-
Notifications
You must be signed in to change notification settings - Fork 0
/
cyr2lat
executable file
·98 lines (92 loc) · 2.44 KB
/
cyr2lat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/sh
# translit - Serbian alphabet transilteration
# transliterate input from UTF-8 Serbian Cyrillic to Latin:
# cyr2lat [ file ... ]
#
# transliterate input from UTF-8 Serbian Latin to 7-bit ASCII:
# lat2ascii [ file ... ]
#
# Requires: AWK that supports UTF-8 (looks for GNU AWK)
#
# Author: Vladimir Vrzić
#
# translit is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
searchpath() {
# Usage: searchpath progname path
IFS=${PATH_SEPARATOR-":"} prog=':'
for dir in $2; do
# try `progname' with all well known extensions
for ext in '' '.exe'; do
try="$dir/$1$ext"
test -f "$try" && test -x "$try" && prog="$try" && break
done
test "$prog" = ":" || break
done
echo "$prog"
}
AWK=$(searchpath gawk "$PATH")
if [ "$AWK" = ":" ]; then
echo >&2 "$0: Cannot find gawk in PATH"
exit 1
fi
awk_cyr2lat_map='
BEGIN {
m["А"] = "A" ; m["а"] = "a"
m["Б"] = "B" ; m["б"] = "b"
m["В"] = "V" ; m["в"] = "v"
m["Г"] = "G" ; m["г"] = "g"
m["Д"] = "D" ; m["д"] = "d"
m["Ђ"] = "Đ" ; m["ђ"] = "đ"
m["Е"] = "E" ; m["е"] = "e"
m["Ж"] = "Ž" ; m["ж"] = "ž"
m["З"] = "Z" ; m["з"] = "z"
m["И"] = "I" ; m["и"] = "i"
m["Ј"] = "J" ; m["ј"] = "j"
m["К"] = "K" ; m["к"] = "k"
m["Л"] = "L" ; m["л"] = "l"
m["Љ"] = "Lj"; m["љ"] = "lj"
m["М"] = "M" ; m["м"] = "m"
m["Н"] = "N" ; m["н"] = "n"
m["Њ"] = "Nj"; m["њ"] = "nj"
m["О"] = "O" ; m["о"] = "o"
m["П"] = "P" ; m["п"] = "p"
m["Р"] = "R" ; m["р"] = "r"
m["С"] = "S" ; m["с"] = "s"
m["Т"] = "T" ; m["т"] = "t"
m["Ћ"] = "Ć" ; m["ћ"] = "ć"
m["У"] = "U" ; m["у"] = "u"
m["Ф"] = "F" ; m["ф"] = "f"
m["Х"] = "H" ; m["х"] = "h"
m["Ц"] = "C" ; m["ц"] = "c"
m["Ч"] = "Č" ; m["ч"] = "č"
m["Џ"] = "Dž"; m["џ"] = "dž"
m["Ш"] = "Š" ; m["ш"] = "š"
FS=""
}
'
awk_lat2ascii_map='
BEGIN {
m["Č"] = "C" ; m["č"] = "c"
m["Ć"] = "C" ; m["ć"] = "c"
m["Đ"] = "Dj"; m["đ"] = "dj"
m["Š"] = "S" ; m["š"] = "s"
m["Ž"] = "Z" ; m["ž"] = "z"
FS=""
}
'
awk_each_line='
{
for (i = 1; i <= NF; i++)
if (m[$i]) printf("%s", m[$i])
else printf("%s", $i)
printf("\n")
}
'
awk_begin="$awk_cyr2lat_map"
if [ "$(basename "$0")" = 'lat2ascii' ]; then
awk_begin="$awk_lat2ascii_map"
fi
exec $AWK "$awk_begin $awk_each_line" "$@"