1 if (true || '\'); then exec perl -CI "$0" "$@"; fi # ') {}
2 # The above uses the fact that backslash isn't special in single quotes in
3 # shell script, whereas in perl it escapes the following single quote.
4 #
5 # The problem it tries to solve is that we want perl to be run with -CI flag
6 # (to have stdin interpreted as utf-8), so we would use `#! /usr/bin/perl -CI',
7 # except that if we do that then perl 5.10 complains about it being too late
8 # to apply -CI if the script is run with `perl -CI ./utf8-to-roff', as we want
9 # to do from the Makefile. The reason we don't do `./utf8-to-roff' from the makefile
10 # is that then we require the #! line to have the right location of perl instead of
11 # just consulting the path. (Similarly, we could use `#! /usr/bin/env perl -CI',
12 # though that still requires that /usr/bin/env exist.) The reason we don't just
13 # remove the `-CI' from the #! line is that then the script couldn't be run correctly
14 # with ./utf8-to-roff.
17 # Converts a man page from utf8 (not understood by groff 1.18) to groff escapes.
18 # I couldn't find any existing tool to convert from utf8 to groff, though I
19 # seem to recall seeing some code to do so somewhere.
20 #
21 # Hereby released into public domain by Peter Moulder.
22 use warnings;
23 use strict;
25 # Table generated automatically using:
26 # zcat /usr/share/man/man7/groff_char.7.gz |groff -man -Tutf8| col -pb | grep '\\\['|
27 # perl -CI -nae 'my ($ch, $seq) = @F; if (ord($ch) >= 128) { printf(" 0x\%x, q{\%s},\n", ord($ch), $seq); }'
28 # with č (0x10d) manually translated as ˇc (\[ah]c). (Anyone have a better translation, e.g. using
29 # overprint? \[vc] doesn't work, btw.)
30 # Similarly, ć (0x107) has been manually translated as ´c (\[aa]c).
31 my %map = (
32 0xd0, q{\[-D]},
33 0xf0, q{\[Sd]},
34 0xde, q{\[TP]},
35 0xfe, q{\[Tp]},
36 0xdf, q{\[ss]},
37 0xfb00, q{\[ff]},
38 0xfb01, q{\[fi]},
39 0xfb02, q{\[fl]},
40 0xfb03, q{\[Fi]},
41 0xfb04, q{\[Fl]},
42 0xc6, q{\[AE]},
43 0xe6, q{\[ae]},
44 0x152, q{\[OE]},
45 0x153, q{\[oe]},
46 0x131, q{\[.i]},
47 0xc1, q{\['A]},
48 0xc9, q{\['E]},
49 0xcd, q{\['I]},
50 0xd3, q{\['O]},
51 0xda, q{\['U]},
52 0xdd, q{\['Y]},
53 0xe1, q{\['a]},
54 0xe9, q{\['e]},
55 0xed, q{\['i]},
56 0xf3, q{\['o]},
57 0xfa, q{\['u]},
58 0xfd, q{\['y]},
59 0xc4, q{\[:A]},
60 0xcb, q{\[:E]},
61 0xcf, q{\[:I]},
62 0xd6, q{\[:O]},
63 0xdc, q{\[:U]},
64 0x178, q{\[:Y]},
65 0xe4, q{\[:a]},
66 0xeb, q{\[:e]},
67 0xef, q{\[:i]},
68 0xf6, q{\[:o]},
69 0xfc, q{\[:u]},
70 0xff, q{\[:y]},
71 0xc2, q{\[^A]},
72 0xca, q{\[^E]},
73 0xce, q{\[^I]},
74 0xd4, q{\[^O]},
75 0xdb, q{\[^U]},
76 0xe2, q{\[^a]},
77 0xea, q{\[^e]},
78 0xee, q{\[^i]},
79 0xf4, q{\[^o]},
80 0xfb, q{\[^u]},
81 0xc0, q{\[`A]},
82 0xc8, q{\[`E]},
83 0xcc, q{\[`I]},
84 0xd2, q{\[`O]},
85 0xd9, q{\[`U]},
86 0xe0, q{\[`a]},
87 0xe8, q{\[`e]},
88 0xec, q{\[`i]},
89 0xf2, q{\[`o]},
90 0xf9, q{\[`u]},
91 0xc3, q{\[~A]},
92 0xd1, q{\[~N]},
93 0xd5, q{\[~O]},
94 0xe3, q{\[~a]},
95 0xf1, q{\[~n]},
96 0xf5, q{\[~o]},
97 0x107, q{\[aa]c}, # Added manually; see above.
98 0x10d, q{\[ah]c}, # Added manually; see above.
99 0x160, q{\[vS]},
100 0x161, q{\[vs]},
101 0x17d, q{\[vZ]},
102 0x17e, q{\[vz]},
103 0xc7, q{\[,C]},
104 0xe7, q{\[,c]},
105 0x141, q{\[/L]},
106 0x142, q{\[/l]},
107 0xd8, q{\[/O]},
108 0xf8, q{\[/o]},
109 0xc5, q{\[oA]},
110 0xe5, q{\[oa]},
111 0x2dd, q{\[a"]},
112 0xaf, q{\[a-]},
113 0x2d9, q{\[a.]},
114 0xb4, q{\[aa]},
115 0x2d8, q{\[ab]},
116 0xb8, q{\[ac]},
117 0xa8, q{\[ad]},
118 0x2c7, q{\[ah]},
119 0x2da, q{\[ao]},
120 0x2db, q{\[ho]},
121 0x223c, q{\[ti]},
122 0x201e, q{\[Bq]},
123 0x201a, q{\[bq]},
124 0x201c, q{\[lq]},
125 0x201d, q{\[rq]},
126 0x2018, q{\[oq]},
127 0x2019, q{\[cq]},
128 0xab, q{\[Fo]},
129 0xbb, q{\[Fc]},
130 0x2039, q{\[fo]},
131 0x203a, q{\[fc]},
132 0xa1, q{\[r!]},
133 0xbf, q{\[r?]},
134 0x2014, q{\[em]},
135 0x2013, q{\[en]},
136 0x2010, q{\[hy]},
137 0x2329, q{\[la]},
138 0x232a, q{\[ra]},
139 0x2190, q{\[<-]},
140 0x2192, q{\[->]},
141 0x2194, q{\[<>]},
142 0x2193, q{\[da]},
143 0x21d1, q{\[ua]},
144 0x21d0, q{\[lA]},
145 0x21d2, q{\[rA]},
146 0x21d4, q{\[hA]},
147 0x21d3, q{\[dA]},
148 0x21d1, q{\[uA]},
149 0x2500, q{\[an]},
150 0x2502, q{\[br]},
151 0x2502, q{\[bv]},
152 0xa6, q{\[bb]},
153 0x25ef, q{\[ci]},
154 0xb7, q{\[bu]},
155 0x2021, q{\[dd]},
156 0x2020, q{\[dg]},
157 0x25ca, q{\[lz]},
158 0x25a1, q{\[sq]},
159 0xb6, q{\[ps]},
160 0xa7, q{\[sc]},
161 0x261c, q{\[lh]},
162 0x261e, q{\[rh]},
163 0x240d, q{\[CR]},
164 0xa9, q{\[co]},
165 0xae, q{\[rg]},
166 0x2122, q{\[tm]},
167 0x21d1, q{\[Do]},
168 0xa2, q{\[ct]},
169 0x20ac, q{\[eu]},
170 0x20ac, q{\[Eu]},
171 0xa5, q{\[Ye]},
172 0xa3, q{\[Po]},
173 0xa4, q{\[Cs]},
174 0x192, q{\[Fn]},
175 0xb0, q{\[de]},
176 0x2030, q{\[%0]},
177 0x2032, q{\[fm]},
178 0x2033, q{\[sd]},
179 0xb5, q{\[mc]},
180 0xaa, q{\[Of]},
181 0xba, q{\[Om]},
182 0x2227, q{\[AN]},
183 0x2228, q{\[OR]},
184 0xac, q{\[no]},
185 0x2203, q{\[te]},
186 0x2200, q{\[fa]},
187 0x220b, q{\[st]},
188 0x2234, q{\[3d]},
189 0x2234, q{\[tf]},
190 0xbd, q{\[12]},
191 0xbc, q{\[14]},
192 0xbe, q{\[34]},
193 0xb9, q{\[S1]},
194 0xb2, q{\[S2]},
195 0xb3, q{\[S3]},
196 0xb1, q{\[+-]},
197 0xb1, q{\[t+-]},
198 0xb7, q{\[pc]},
199 0x22c5, q{\[md]},
200 0xd7, q{\[mu]},
201 0xd7, q{\[tmu]},
202 0x2297, q{\[c*]},
203 0x2295, q{\[c+]},
204 0xf7, q{\[di]},
205 0xf7, q{\[tdi]},
206 0x2044, q{\[f/]},
207 0x2217, q{\[**]},
208 0x2264, q{\[<=]},
209 0x2265, q{\[>=]},
210 0x2260, q{\[!=]},
211 0x2261, q{\[==]},
212 0x2245, q{\[=~]},
213 0x223c, q{\[ap]},
214 0x2248, q{\[~~]},
215 0x2248, q{\[~=]},
216 0x221d, q{\[pt]},
217 0x2205, q{\[es]},
218 0x2208, q{\[mo]},
219 0x2209, q{\[nm]},
220 0x2284, q{\[nb]},
221 0x2282, q{\[sb]},
222 0x2283, q{\[sp]},
223 0x2286, q{\[ib]},
224 0x2287, q{\[ip]},
225 0x2229, q{\[ca]},
226 0x222a, q{\[cu]},
227 0x2220, q{\[/_]},
228 0x22a5, q{\[pp]},
229 0x222b, q{\[is]},
230 0x2211, q{\[sum]},
231 0x220f, q{\[product]},
232 0x2207, q{\[gr]},
233 0x221a, q{\[sr]},
234 0x203e, q{\[rn]},
235 0x221e, q{\[if]},
236 0x2135, q{\[Ah]},
237 0x2111, q{\[Im]},
238 0x211c, q{\[Re]},
239 0x2118, q{\[wp]},
240 0x2202, q{\[pd]},
241 0x391, q{\[*A]},
242 0x392, q{\[*B]},
243 0x39e, q{\[*C]},
244 0x394, q{\[*D]},
245 0x395, q{\[*E]},
246 0x3a6, q{\[*F]},
247 0x393, q{\[*G]},
248 0x398, q{\[*H]},
249 0x399, q{\[*I]},
250 0x39a, q{\[*K]},
251 0x39b, q{\[*L]},
252 0x39c, q{\[*M]},
253 0x39d, q{\[*N]},
254 0x39f, q{\[*O]},
255 0x3a0, q{\[*P]},
256 0x3a8, q{\[*Q]},
257 0x3a1, q{\[*R]},
258 0x3a3, q{\[*S]},
259 0x3a4, q{\[*T]},
260 0x3a5, q{\[*U]},
261 0x3a9, q{\[*W]},
262 0x3a7, q{\[*X]},
263 0x397, q{\[*Y]},
264 0x396, q{\[*Z]},
265 0x3b1, q{\[*a]},
266 0x3b2, q{\[*b]},
267 0x3be, q{\[*c]},
268 0x3b4, q{\[*d]},
269 0x3b5, q{\[*e]},
270 0x3c6, q{\[*f]},
271 0x3d5, q{\[+f]},
272 0x3b3, q{\[*g]},
273 0x3b8, q{\[*h]},
274 0x3d1, q{\[+h]},
275 0x3b9, q{\[*i]},
276 0x3ba, q{\[*k]},
277 0x3bb, q{\[*l]},
278 0x3bc, q{\[*m]},
279 0x3bd, q{\[*n]},
280 0x3bf, q{\[*o]},
281 0x3c0, q{\[*p]},
282 0x3d6, q{\[+p]},
283 0x3c8, q{\[*q]},
284 0x3c1, q{\[*r]},
285 0x3c3, q{\[*s]},
286 0x3c4, q{\[*t]},
287 0x3c5, q{\[*u]},
288 0x3c9, q{\[*w]},
289 0x3c7, q{\[*x]},
290 0x3b7, q{\[*y]},
291 0x3b6, q{\[*z]},
292 0x3c2, q{\[ts]},
293 0x2663, q{\[CL]},
294 0x2660, q{\[SP]},
295 0x2665, q{\[HE]},
296 0x2666, q{\[DI]},
297 );
299 #while(<>) {
300 # s/([^ -~])/(ord($1) < 128 ? $1 : defined($map{$1}) ? $map{$1} : sprintf("\\u%4x", $1))/ge;
301 # print;
302 #}
303 #exit 0;
305 my $ch;
306 while(defined($ch = getc(STDIN))) {
307 my $ord = ord($ch);
308 if ($ord < 128) {
309 print $ch;
310 } else {
311 my $out = $map{$ord};
312 if (defined($out)) {
313 print $out;
314 } else {
315 die "Untranslatable character \\u" . sprintf("%X", ord($ch)) . " / `$ch'";
316 }
317 }
318 }