1 #!@INTLTOOL_PERL@ -w
2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
4 #
5 # The Intltool Message Merger
6 #
7 # Copyright (C) 2000, 2003 Free Software Foundation.
8 # Copyright (C) 2000, 2001 Eazel, Inc
9 #
10 # Intltool is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU General Public License
12 # version 2 published by the Free Software Foundation.
13 #
14 # Intltool is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #
23 # As a special exception to the GNU General Public License, if you
24 # distribute this file as part of a program that contains a
25 # configuration script generated by Autoconf, you may include it under
26 # the same distribution terms that you use for the rest of that program.
27 #
28 # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
29 # Kenneth Christiansen <kenneth@gnu.org>
30 # Darin Adler <darin@bentspoon.com>
31 #
32 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33 #
35 ## Release information
36 my $PROGRAM = "intltool-merge";
37 my $PACKAGE = "intltool";
38 my $VERSION = "0.37.0";
40 ## Loaded modules
41 use strict;
42 use Getopt::Long;
43 use Text::Wrap;
44 use File::Basename;
46 my $must_end_tag = -1;
47 my $last_depth = -1;
48 my $translation_depth = -1;
49 my @tag_stack = ();
50 my @entered_tag = ();
51 my @translation_strings = ();
52 my $leading_space = "";
54 ## Scalars used by the option stuff
55 my $HELP_ARG = 0;
56 my $VERSION_ARG = 0;
57 my $BA_STYLE_ARG = 0;
58 my $XML_STYLE_ARG = 0;
59 my $KEYS_STYLE_ARG = 0;
60 my $DESKTOP_STYLE_ARG = 0;
61 my $SCHEMAS_STYLE_ARG = 0;
62 my $RFC822DEB_STYLE_ARG = 0;
63 my $QUOTED_STYLE_ARG = 0;
64 my $QUOTEDXML_STYLE_ARG = 0;
65 my $QUIET_ARG = 0;
66 my $PASS_THROUGH_ARG = 0;
67 my $UTF8_ARG = 0;
68 my $MULTIPLE_OUTPUT = 0;
69 my $cache_file;
71 ## Handle options
72 GetOptions
73 (
74 "help" => \$HELP_ARG,
75 "version" => \$VERSION_ARG,
76 "quiet|q" => \$QUIET_ARG,
77 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
78 "ba-style|b" => \$BA_STYLE_ARG,
79 "xml-style|x" => \$XML_STYLE_ARG,
80 "keys-style|k" => \$KEYS_STYLE_ARG,
81 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
82 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
83 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
84 "quoted-style" => \$QUOTED_STYLE_ARG,
85 "quotedxml-style" => \$QUOTEDXML_STYLE_ARG,
86 "pass-through|p" => \$PASS_THROUGH_ARG,
87 "utf8|u" => \$UTF8_ARG,
88 "multiple-output|m" => \$MULTIPLE_OUTPUT,
89 "cache|c=s" => \$cache_file
90 ) or &error;
92 my $PO_DIR;
93 my $FILE;
94 my $OUTFILE;
96 my %po_files_by_lang = ();
97 my %translations = ();
98 my $iconv = $ENV{"ICONV"} || "iconv";
99 my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
101 sub isProgramInPath
102 {
103 my ($file) = @_;
104 # If either a file exists, or when run it returns 0 exit status
105 return 1 if ((-x $file) or (system("$file -l >$devnull") == 0));
106 return 0;
107 }
109 if (! isProgramInPath ("$iconv"))
110 {
111 print STDERR " *** iconv is not found on this system!\n".
112 " *** Without it, intltool-merge can not convert encodings.\n";
113 exit;
114 }
116 # Use this instead of \w for XML files to handle more possible characters.
117 my $w = "[-A-Za-z0-9._:]";
119 # XML quoted string contents
120 my $q = "[^\\\"]*";
122 ## Check for options.
124 if ($VERSION_ARG)
125 {
126 &print_version;
127 }
128 elsif ($HELP_ARG)
129 {
130 &print_help;
131 }
132 elsif ($BA_STYLE_ARG && @ARGV > 2)
133 {
134 &utf8_sanity_check;
135 &preparation;
136 &print_message;
137 &ba_merge_translations;
138 &finalize;
139 }
140 elsif ($XML_STYLE_ARG && @ARGV > 2)
141 {
142 &utf8_sanity_check;
143 &preparation;
144 &print_message;
145 &xml_merge_output;
146 &finalize;
147 }
148 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
149 {
150 &utf8_sanity_check;
151 &preparation;
152 &print_message;
153 &keys_merge_translations;
154 &finalize;
155 }
156 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
157 {
158 &utf8_sanity_check;
159 &preparation;
160 &print_message;
161 &desktop_merge_translations;
162 &finalize;
163 }
164 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
165 {
166 &utf8_sanity_check;
167 &preparation;
168 &print_message;
169 &schemas_merge_translations;
170 &finalize;
171 }
172 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
173 {
174 &preparation;
175 &print_message;
176 &rfc822deb_merge_translations;
177 &finalize;
178 }
179 elsif (($QUOTED_STYLE_ARG || $QUOTEDXML_STYLE_ARG) && @ARGV > 2)
180 {
181 &utf8_sanity_check;
182 &preparation;
183 &print_message;
184 "ed_merge_translations($QUOTEDXML_STYLE_ARG);
185 &finalize;
186 }
187 else
188 {
189 &print_help;
190 }
192 exit;
194 ## Sub for printing release information
195 sub print_version
196 {
197 print <<_EOF_;
198 ${PROGRAM} (${PACKAGE}) ${VERSION}
199 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
201 Copyright (C) 2000-2003 Free Software Foundation, Inc.
202 Copyright (C) 2000-2001 Eazel, Inc.
203 This is free software; see the source for copying conditions. There is NO
204 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
205 _EOF_
206 exit;
207 }
209 ## Sub for printing usage information
210 sub print_help
211 {
212 print <<_EOF_;
213 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
214 Generates an output file that includes some localized attributes from an
215 untranslated source file.
217 Mandatory options: (exactly one must be specified)
218 -b, --ba-style includes translations in the bonobo-activation style
219 -d, --desktop-style includes translations in the desktop style
220 -k, --keys-style includes translations in the keys style
221 -s, --schemas-style includes translations in the schemas style
222 -r, --rfc822deb-style includes translations in the RFC822 style
223 --quoted-style includes translations in the quoted string style
224 --quotedxml-style includes translations in the quoted xml string style
225 -x, --xml-style includes translations in the standard xml style
227 Other options:
228 -u, --utf8 convert all strings to UTF-8 before merging
229 (default for everything except RFC822 style)
230 -p, --pass-through deprecated, does nothing and issues a warning
231 -m, --multiple-output output one localized file per locale, instead of
232 a single file containing all localized elements
233 -c, --cache=FILE specify cache file name
234 (usually \$top_builddir/po/.intltool-merge-cache)
235 -q, --quiet suppress most messages
236 --help display this help and exit
237 --version output version information and exit
239 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
240 or send email to <xml-i18n-tools\@gnome.org>.
241 _EOF_
242 exit;
243 }
246 ## Sub for printing error messages
247 sub print_error
248 {
249 print STDERR "Try `${PROGRAM} --help' for more information.\n";
250 exit;
251 }
254 sub print_message
255 {
256 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
257 }
260 sub preparation
261 {
262 $PO_DIR = $ARGV[0];
263 $FILE = $ARGV[1];
264 $OUTFILE = $ARGV[2];
266 &gather_po_files;
267 &get_translation_database;
268 }
270 # General-purpose code for looking up translations in .po files
272 sub po_file2lang
273 {
274 my ($tmp) = @_;
275 $tmp =~ s/^.*\/(.*)\.po$/$1/;
276 return $tmp;
277 }
279 sub gather_po_files
280 {
281 if (my $linguas = $ENV{"LINGUAS"})
282 {
283 for my $lang (split / /, $linguas) {
284 my $po_file = $PO_DIR . "/" . $lang . ".po";
285 if (-e $po_file) {
286 $po_files_by_lang{$lang} = $po_file;
287 }
288 }
289 }
290 else
291 {
292 if (open LINGUAS_FILE, "$PO_DIR/LINGUAS")
293 {
294 while (<LINGUAS_FILE>)
295 {
296 next if /^#/;
298 for my $lang (split)
299 {
300 chomp ($lang);
301 my $po_file = $PO_DIR . "/" . $lang . ".po";
302 if (-e $po_file) {
303 $po_files_by_lang{$lang} = $po_file;
304 }
305 }
306 }
308 close LINGUAS_FILE;
309 }
310 else
311 {
312 for my $po_file (glob "$PO_DIR/*.po") {
313 $po_files_by_lang{po_file2lang($po_file)} = $po_file;
314 }
315 }
316 }
317 }
319 sub get_local_charset
320 {
321 my ($encoding) = @_;
322 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";
324 # seek character encoding aliases in charset.alias (glib)
326 if (open CHARSET_ALIAS, $alias_file)
327 {
328 while (<CHARSET_ALIAS>)
329 {
330 next if /^\#/;
331 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
332 }
334 close CHARSET_ALIAS;
335 }
337 # if not found, return input string
339 return $encoding;
340 }
342 sub get_po_encoding
343 {
344 my ($in_po_file) = @_;
345 my $encoding = "";
347 open IN_PO_FILE, $in_po_file or die;
348 while (<IN_PO_FILE>)
349 {
350 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
351 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
352 {
353 $encoding = $1;
354 last;
355 }
356 }
357 close IN_PO_FILE;
359 if (!$encoding)
360 {
361 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
362 $encoding = "ISO-8859-1";
363 }
365 system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
366 if ($?) {
367 $encoding = get_local_charset($encoding);
368 }
370 return $encoding
371 }
373 sub utf8_sanity_check
374 {
375 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
376 $UTF8_ARG = 1;
377 }
379 sub get_translation_database
380 {
381 if ($cache_file) {
382 &get_cached_translation_database;
383 } else {
384 &create_translation_database;
385 }
386 }
388 sub get_newest_po_age
389 {
390 my $newest_age;
392 foreach my $file (values %po_files_by_lang)
393 {
394 my $file_age = -M $file;
395 $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
396 }
398 $newest_age = 0 if !$newest_age;
400 return $newest_age;
401 }
403 sub create_cache
404 {
405 print "Generating and caching the translation database\n" unless $QUIET_ARG;
407 &create_translation_database;
409 open CACHE, ">$cache_file" || die;
410 print CACHE join "\x01", %translations;
411 close CACHE;
412 }
414 sub load_cache
415 {
416 print "Found cached translation database\n" unless $QUIET_ARG;
418 my $contents;
419 open CACHE, "<$cache_file" || die;
420 {
421 local $/;
422 $contents = <CACHE>;
423 }
424 close CACHE;
425 %translations = split "\x01", $contents;
426 }
428 sub get_cached_translation_database
429 {
430 my $cache_file_age = -M $cache_file;
431 if (defined $cache_file_age)
432 {
433 if ($cache_file_age <= &get_newest_po_age)
434 {
435 &load_cache;
436 return;
437 }
438 print "Found too-old cached translation database\n" unless $QUIET_ARG;
439 }
441 &create_cache;
442 }
444 sub create_translation_database
445 {
446 for my $lang (keys %po_files_by_lang)
447 {
448 my $po_file = $po_files_by_lang{$lang};
450 if ($UTF8_ARG)
451 {
452 my $encoding = get_po_encoding ($po_file);
454 if (lc $encoding eq "utf-8")
455 {
456 open PO_FILE, "<$po_file";
457 }
458 else
459 {
460 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
462 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
463 }
464 }
465 else
466 {
467 open PO_FILE, "<$po_file";
468 }
470 my $nextfuzzy = 0;
471 my $inmsgid = 0;
472 my $inmsgstr = 0;
473 my $msgid = "";
474 my $msgstr = "";
476 while (<PO_FILE>)
477 {
478 $nextfuzzy = 1 if /^#, fuzzy/;
480 if (/^msgid "((\\.|[^\\]+)*)"/ )
481 {
482 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
483 $msgid = "";
484 $msgstr = "";
486 if ($nextfuzzy) {
487 $inmsgid = 0;
488 } else {
489 $msgid = unescape_po_string($1);
490 $inmsgid = 1;
491 }
492 $inmsgstr = 0;
493 $nextfuzzy = 0;
494 }
496 if (/^msgstr "((\\.|[^\\]+)*)"/)
497 {
498 $msgstr = unescape_po_string($1);
499 $inmsgstr = 1;
500 $inmsgid = 0;
501 }
503 if (/^"((\\.|[^\\]+)*)"/)
504 {
505 $msgid .= unescape_po_string($1) if $inmsgid;
506 $msgstr .= unescape_po_string($1) if $inmsgstr;
507 }
508 }
509 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
510 }
511 }
513 sub finalize
514 {
515 }
517 sub unescape_one_sequence
518 {
519 my ($sequence) = @_;
521 return "\\" if $sequence eq "\\\\";
522 return "\"" if $sequence eq "\\\"";
523 return "\n" if $sequence eq "\\n";
524 return "\r" if $sequence eq "\\r";
525 return "\t" if $sequence eq "\\t";
526 return "\b" if $sequence eq "\\b";
527 return "\f" if $sequence eq "\\f";
528 return "\a" if $sequence eq "\\a";
529 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
531 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
532 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
534 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
536 return $sequence;
537 }
539 sub unescape_po_string
540 {
541 my ($string) = @_;
543 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
545 return $string;
546 }
548 sub entity_decode
549 {
550 local ($_) = @_;
552 s/'/'/g; # '
553 s/"/"/g; # "
554 s/</</g;
555 s/>/>/g;
556 s/&/&/g;
558 return $_;
559 }
561 # entity_encode: (string)
562 #
563 # Encode the given string to XML format (encode '<' etc).
565 sub entity_encode
566 {
567 my ($pre_encoded) = @_;
569 my @list_of_chars = unpack ('C*', $pre_encoded);
571 # with UTF-8 we only encode minimalistic
572 return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
573 }
575 sub entity_encode_int_minimalist
576 {
577 return """ if $_ == 34;
578 return "&" if $_ == 38;
579 return "'" if $_ == 39;
580 return "<" if $_ == 60;
581 return ">" if $_ == 62;
582 return chr $_;
583 }
585 sub entity_encoded_translation
586 {
587 my ($lang, $string) = @_;
589 my $translation = $translations{$lang, $string};
590 return $string if !$translation;
591 return entity_encode ($translation);
592 }
594 ## XML (bonobo-activation specific) merge code
596 sub ba_merge_translations
597 {
598 my $source;
600 {
601 local $/; # slurp mode
602 open INPUT, "<$FILE" or die "can't open $FILE: $!";
603 $source = <INPUT>;
604 close INPUT;
605 }
607 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
608 # Binmode so that selftest works ok if using a native Win32 Perl...
609 binmode (OUTPUT) if $^O eq 'MSWin32';
611 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
612 {
613 print OUTPUT $1;
615 my $node = $2 . "\n";
617 my @strings = ();
618 $_ = $node;
619 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
620 push @strings, entity_decode($3);
621 }
622 print OUTPUT;
624 my %langs;
625 for my $string (@strings)
626 {
627 for my $lang (keys %po_files_by_lang)
628 {
629 $langs{$lang} = 1 if $translations{$lang, $string};
630 }
631 }
633 for my $lang (sort keys %langs)
634 {
635 $_ = $node;
636 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
637 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
638 print OUTPUT;
639 }
640 }
642 print OUTPUT $source;
644 close OUTPUT;
645 }
648 ## XML (non-bonobo-activation) merge code
651 # Process tag attributes
652 # Only parameter is a HASH containing attributes -> values mapping
653 sub getAttributeString
654 {
655 my $sub = shift;
656 my $do_translate = shift || 0;
657 my $language = shift || "";
658 my $result = "";
659 my $translate = shift;
660 foreach my $e (reverse(sort(keys %{ $sub }))) {
661 my $key = $e;
662 my $string = $sub->{$e};
663 my $quote = '"';
665 $string =~ s/^[\s]+//;
666 $string =~ s/[\s]+$//;
668 if ($string =~ /^'.*'$/)
669 {
670 $quote = "'";
671 }
672 $string =~ s/^['"]//g;
673 $string =~ s/['"]$//g;
675 if ($do_translate && $key =~ /^_/) {
676 $key =~ s|^_||g;
677 if ($language) {
678 # Handle translation
679 my $decode_string = entity_decode($string);
680 my $translation = $translations{$language, $decode_string};
681 if ($translation) {
682 $translation = entity_encode($translation);
683 $string = $translation;
684 }
685 $$translate = 2;
686 } else {
687 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
688 }
689 }
691 $result .= " $key=$quote$string$quote";
692 }
693 return $result;
694 }
696 # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
697 sub getXMLstring
698 {
699 my $ref = shift;
700 my $spacepreserve = shift || 0;
701 my @list = @{ $ref };
702 my $result = "";
704 my $count = scalar(@list);
705 my $attrs = $list[0];
706 my $index = 1;
708 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
709 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
711 while ($index < $count) {
712 my $type = $list[$index];
713 my $content = $list[$index+1];
714 if (! $type ) {
715 # We've got CDATA
716 if ($content) {
717 # lets strip the whitespace here, and *ONLY* here
718 $content =~ s/\s+/ /gs if (!$spacepreserve);
719 $result .= $content;
720 }
721 } elsif ( "$type" ne "1" ) {
722 # We've got another element
723 $result .= "<$type";
724 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
725 if ($content) {
726 my $subresult = getXMLstring($content, $spacepreserve);
727 if ($subresult) {
728 $result .= ">".$subresult . "</$type>";
729 } else {
730 $result .= "/>";
731 }
732 } else {
733 $result .= "/>";
734 }
735 }
736 $index += 2;
737 }
738 return $result;
739 }
741 # Translate list of nodes if necessary
742 sub translate_subnodes
743 {
744 my $fh = shift;
745 my $content = shift;
746 my $language = shift || "";
747 my $singlelang = shift || 0;
748 my $spacepreserve = shift || 0;
750 my @nodes = @{ $content };
752 my $count = scalar(@nodes);
753 my $index = 0;
754 while ($index < $count) {
755 my $type = $nodes[$index];
756 my $rest = $nodes[$index+1];
757 if ($singlelang) {
758 my $oldMO = $MULTIPLE_OUTPUT;
759 $MULTIPLE_OUTPUT = 1;
760 traverse($fh, $type, $rest, $language, $spacepreserve);
761 $MULTIPLE_OUTPUT = $oldMO;
762 } else {
763 traverse($fh, $type, $rest, $language, $spacepreserve);
764 }
765 $index += 2;
766 }
767 }
769 sub isWellFormedXmlFragment
770 {
771 my $ret = eval 'require XML::Parser';
772 if(!$ret) {
773 die "You must have XML::Parser installed to run $0\n\n";
774 }
776 my $fragment = shift;
777 return 0 if (!$fragment);
779 $fragment = "<root>$fragment</root>";
780 my $xp = new XML::Parser(Style => 'Tree');
781 my $tree = 0;
782 eval { $tree = $xp->parse($fragment); };
783 return $tree;
784 }
786 sub traverse
787 {
788 my $fh = shift;
789 my $nodename = shift;
790 my $content = shift;
791 my $language = shift || "";
792 my $spacepreserve = shift || 0;
794 if (!$nodename) {
795 if ($content =~ /^[\s]*$/) {
796 $leading_space .= $content;
797 }
798 print $fh $content;
799 } else {
800 # element
801 my @all = @{ $content };
802 my $attrs = shift @all;
803 my $translate = 0;
804 my $outattr = getAttributeString($attrs, 1, $language, \$translate);
806 if ($nodename =~ /^_/) {
807 $translate = 1;
808 $nodename =~ s/^_//;
809 }
810 my $lookup = '';
812 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
813 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
815 print $fh "<$nodename", $outattr;
816 if ($translate) {
817 $lookup = getXMLstring($content, $spacepreserve);
818 if (!$spacepreserve) {
819 $lookup =~ s/^\s+//s;
820 $lookup =~ s/\s+$//s;
821 }
823 if ($lookup || $translate == 2) {
824 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
825 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
826 $translation = $lookup if (!$translation);
827 print $fh " xml:lang=\"", $language, "\"" if $language;
828 print $fh ">";
829 if ($translate == 2) {
830 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
831 } else {
832 print $fh $translation;
833 }
834 print $fh "</$nodename>";
836 return; # this means there will be no same translation with xml:lang="$language"...
837 # if we want them both, just remove this "return"
838 } else {
839 print $fh ">";
840 if ($translate == 2) {
841 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
842 } else {
843 print $fh $lookup;
844 }
845 print $fh "</$nodename>";
846 }
847 } else {
848 print $fh "/>";
849 }
851 for my $lang (sort keys %po_files_by_lang) {
852 if ($MULTIPLE_OUTPUT && $lang ne "$language") {
853 next;
854 }
855 if ($lang) {
856 # Handle translation
857 #
858 my $translate = 0;
859 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
860 my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
861 if ($translate && !$translation) {
862 $translation = $lookup;
863 }
865 if ($translation || $translate) {
866 print $fh "\n";
867 $leading_space =~ s/.*\n//g;
868 print $fh $leading_space;
869 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
870 if ($translate == 2) {
871 translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
872 } else {
873 print $fh $translation;
874 }
875 print $fh "</$nodename>";
876 }
877 }
878 }
880 } else {
881 my $count = scalar(@all);
882 if ($count > 0) {
883 print $fh ">";
884 my $index = 0;
885 while ($index < $count) {
886 my $type = $all[$index];
887 my $rest = $all[$index+1];
888 traverse($fh, $type, $rest, $language, $spacepreserve);
889 $index += 2;
890 }
891 print $fh "</$nodename>";
892 } else {
893 print $fh "/>";
894 }
895 }
896 }
897 }
899 sub intltool_tree_comment
900 {
901 my $expat = shift;
902 my $data = shift;
903 my $clist = $expat->{Curlist};
904 my $pos = $#$clist;
906 push @$clist, 1 => $data;
907 }
909 sub intltool_tree_cdatastart
910 {
911 my $expat = shift;
912 my $clist = $expat->{Curlist};
913 my $pos = $#$clist;
915 push @$clist, 0 => $expat->original_string();
916 }
918 sub intltool_tree_cdataend
919 {
920 my $expat = shift;
921 my $clist = $expat->{Curlist};
922 my $pos = $#$clist;
924 $clist->[$pos] .= $expat->original_string();
925 }
927 sub intltool_tree_char
928 {
929 my $expat = shift;
930 my $text = shift;
931 my $clist = $expat->{Curlist};
932 my $pos = $#$clist;
934 # Use original_string so that we retain escaped entities
935 # in CDATA sections.
936 #
937 if ($pos > 0 and $clist->[$pos - 1] eq '0') {
938 $clist->[$pos] .= $expat->original_string();
939 } else {
940 push @$clist, 0 => $expat->original_string();
941 }
942 }
944 sub intltool_tree_start
945 {
946 my $expat = shift;
947 my $tag = shift;
948 my @origlist = ();
950 # Use original_string so that we retain escaped entities
951 # in attribute values. We must convert the string to an
952 # @origlist array to conform to the structure of the Tree
953 # Style.
954 #
955 my @original_array = split /\x/, $expat->original_string();
956 my $source = $expat->original_string();
958 # Remove leading tag.
959 #
960 $source =~ s|^\s*<\s*(\S+)||s;
962 # Grab attribute key/value pairs and push onto @origlist array.
963 #
964 while ($source)
965 {
966 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
967 {
968 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
969 push @origlist, $1;
970 push @origlist, '"' . $2 . '"';
971 }
972 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
973 {
974 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
975 push @origlist, $1;
976 push @origlist, "'" . $2 . "'";
977 }
978 else
979 {
980 last;
981 }
982 }
984 my $ol = [ { @origlist } ];
986 push @{ $expat->{Lists} }, $expat->{Curlist};
987 push @{ $expat->{Curlist} }, $tag => $ol;
988 $expat->{Curlist} = $ol;
989 }
991 sub readXml
992 {
993 my $filename = shift || return;
994 if(!-f $filename) {
995 die "ERROR Cannot find filename: $filename\n";
996 }
998 my $ret = eval 'require XML::Parser';
999 if(!$ret) {
1000 die "You must have XML::Parser installed to run $0\n\n";
1001 }
1002 my $xp = new XML::Parser(Style => 'Tree');
1003 $xp->setHandlers(Char => \&intltool_tree_char);
1004 $xp->setHandlers(Start => \&intltool_tree_start);
1005 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
1006 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
1007 my $tree = $xp->parsefile($filename);
1009 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
1010 # would be:
1011 # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
1012 # 0, "Howdy", ref, [{}]], 0, "do" ] ]
1014 return $tree;
1015 }
1017 sub print_header
1018 {
1019 my $infile = shift;
1020 my $fh = shift;
1021 my $source;
1023 if(!-f $infile) {
1024 die "ERROR Cannot find filename: $infile\n";
1025 }
1027 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
1028 {
1029 local $/;
1030 open DOCINPUT, "<${FILE}" or die;
1031 $source = <DOCINPUT>;
1032 close DOCINPUT;
1033 }
1034 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
1035 {
1036 print $fh "$1\n";
1037 }
1038 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
1039 {
1040 print $fh "$1\n";
1041 }
1042 }
1044 sub parseTree
1045 {
1046 my $fh = shift;
1047 my $ref = shift;
1048 my $language = shift || "";
1050 my $name = shift @{ $ref };
1051 my $cont = shift @{ $ref };
1053 while (!$name || "$name" eq "1") {
1054 $name = shift @{ $ref };
1055 $cont = shift @{ $ref };
1056 }
1058 my $spacepreserve = 0;
1059 my $attrs = @{$cont}[0];
1060 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
1062 traverse($fh, $name, $cont, $language, $spacepreserve);
1063 }
1065 sub xml_merge_output
1066 {
1067 my $source;
1069 if ($MULTIPLE_OUTPUT) {
1070 for my $lang (sort keys %po_files_by_lang) {
1071 if ( ! -d $lang ) {
1072 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1073 }
1074 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1075 binmode (OUTPUT) if $^O eq 'MSWin32';
1076 my $tree = readXml($FILE);
1077 print_header($FILE, \*OUTPUT);
1078 parseTree(\*OUTPUT, $tree, $lang);
1079 close OUTPUT;
1080 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1081 }
1082 if ( ! -d "C" ) {
1083 mkdir "C" or -d "C" or die "Cannot create subdirectory C: $!\n";
1084 }
1085 open OUTPUT, ">C/$OUTFILE" or die "Cannot open C/$OUTFILE: $!\n";
1086 binmode (OUTPUT) if $^O eq 'MSWin32';
1087 my $tree = readXml($FILE);
1088 print_header($FILE, \*OUTPUT);
1089 parseTree(\*OUTPUT, $tree);
1090 close OUTPUT;
1091 print "CREATED C/$OUTFILE\n" unless $QUIET_ARG;
1092 } else {
1093 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1094 binmode (OUTPUT) if $^O eq 'MSWin32';
1095 my $tree = readXml($FILE);
1096 print_header($FILE, \*OUTPUT);
1097 parseTree(\*OUTPUT, $tree);
1098 close OUTPUT;
1099 print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1100 }
1101 }
1103 sub keys_merge_translation
1104 {
1105 my ($lang) = @_;
1107 if ( ! -d $lang && $MULTIPLE_OUTPUT)
1108 {
1109 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1110 }
1112 open INPUT, "<${FILE}" or die "Cannot open ${FILE}: $!\n";
1113 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1114 binmode (OUTPUT) if $^O eq 'MSWin32';
1116 while (<INPUT>)
1117 {
1118 if (s/^(\s*)_(\w+=(.*))/$1$2/)
1119 {
1120 my $string = $3;
1122 if (!$MULTIPLE_OUTPUT)
1123 {
1124 print OUTPUT;
1126 my $non_translated_line = $_;
1128 for my $lang (sort keys %po_files_by_lang)
1129 {
1130 my $translation = $translations{$lang, $string};
1131 next if !$translation;
1133 $_ = $non_translated_line;
1134 s/(\w+)=.*/[$lang]$1=$translation/;
1135 print OUTPUT;
1136 }
1137 }
1138 else
1139 {
1140 my $non_translated_line = $_;
1141 my $translation = $translations{$lang, $string};
1142 $translation = $string if !$translation;
1144 $_ = $non_translated_line;
1145 s/(\w+)=.*/$1=$translation/;
1146 print OUTPUT;
1147 }
1148 }
1149 else
1150 {
1151 print OUTPUT;
1152 }
1153 }
1155 close OUTPUT;
1156 close INPUT;
1158 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1159 }
1161 sub keys_merge_translations
1162 {
1163 if ($MULTIPLE_OUTPUT)
1164 {
1165 for my $lang (sort keys %po_files_by_lang)
1166 {
1167 keys_merge_translation ($lang);
1168 }
1169 keys_merge_translation ("C");
1170 }
1171 else
1172 {
1173 keys_merge_translation (".");
1174 }
1175 }
1177 sub desktop_merge_translations
1178 {
1179 open INPUT, "<${FILE}" or die;
1180 open OUTPUT, ">${OUTFILE}" or die;
1181 binmode (OUTPUT) if $^O eq 'MSWin32';
1183 while (<INPUT>)
1184 {
1185 if (s/^(\s*)_(\w+=(.*))/$1$2/)
1186 {
1187 my $string = $3;
1189 print OUTPUT;
1191 my $non_translated_line = $_;
1193 for my $lang (sort keys %po_files_by_lang)
1194 {
1195 my $translation = $translations{$lang, $string};
1196 next if !$translation;
1198 $_ = $non_translated_line;
1199 s/(\w+)=.*/${1}[$lang]=$translation/;
1200 print OUTPUT;
1201 }
1202 }
1203 else
1204 {
1205 print OUTPUT;
1206 }
1207 }
1209 close OUTPUT;
1210 close INPUT;
1211 }
1213 sub schemas_merge_translations
1214 {
1215 my $source;
1217 {
1218 local $/; # slurp mode
1219 open INPUT, "<$FILE" or die "can't open $FILE: $!";
1220 $source = <INPUT>;
1221 close INPUT;
1222 }
1224 open OUTPUT, ">$OUTFILE" or die;
1225 binmode (OUTPUT) if $^O eq 'MSWin32';
1227 # FIXME: support attribute translations
1229 # Empty nodes never need translation, so unmark all of them.
1230 # For example, <_foo/> is just replaced by <foo/>.
1231 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1233 while ($source =~ s/
1234 (.*?)
1235 (\s+)(<locale\ name="C">(\s*)
1236 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1237 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1238 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1239 <\/locale>)
1240 //sx)
1241 {
1242 print OUTPUT $1;
1244 my $locale_start_spaces = $2 ? $2 : '';
1245 my $default_spaces = $4 ? $4 : '';
1246 my $short_spaces = $7 ? $7 : '';
1247 my $long_spaces = $10 ? $10 : '';
1248 my $locale_end_spaces = $13 ? $13 : '';
1249 my $c_default_block = $3 ? $3 : '';
1250 my $default_string = $6 ? $6 : '';
1251 my $short_string = $9 ? $9 : '';
1252 my $long_string = $12 ? $12 : '';
1254 print OUTPUT "$locale_start_spaces$c_default_block";
1256 $default_string =~ s/\s+/ /g;
1257 $default_string = entity_decode($default_string);
1258 $short_string =~ s/\s+/ /g;
1259 $short_string = entity_decode($short_string);
1260 $long_string =~ s/\s+/ /g;
1261 $long_string = entity_decode($long_string);
1263 for my $lang (sort keys %po_files_by_lang)
1264 {
1265 my $default_translation = $translations{$lang, $default_string};
1266 my $short_translation = $translations{$lang, $short_string};
1267 my $long_translation = $translations{$lang, $long_string};
1269 next if (!$default_translation && !$short_translation &&
1270 !$long_translation);
1272 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1274 print OUTPUT "$default_spaces";
1276 if ($default_translation)
1277 {
1278 $default_translation = entity_encode($default_translation);
1279 print OUTPUT "<default>$default_translation</default>";
1280 }
1282 print OUTPUT "$short_spaces";
1284 if ($short_translation)
1285 {
1286 $short_translation = entity_encode($short_translation);
1287 print OUTPUT "<short>$short_translation</short>";
1288 }
1290 print OUTPUT "$long_spaces";
1292 if ($long_translation)
1293 {
1294 $long_translation = entity_encode($long_translation);
1295 print OUTPUT "<long>$long_translation</long>";
1296 }
1298 print OUTPUT "$locale_end_spaces</locale>";
1299 }
1300 }
1302 print OUTPUT $source;
1304 close OUTPUT;
1305 }
1307 sub rfc822deb_merge_translations
1308 {
1309 my %encodings = ();
1310 for my $lang (keys %po_files_by_lang) {
1311 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1312 }
1314 my $source;
1316 $Text::Wrap::huge = 'overflow';
1317 $Text::Wrap::break = qr/\n|\s(?=\S)/;
1319 {
1320 local $/; # slurp mode
1321 open INPUT, "<$FILE" or die "can't open $FILE: $!";
1322 $source = <INPUT>;
1323 close INPUT;
1324 }
1326 open OUTPUT, ">${OUTFILE}" or die;
1327 binmode (OUTPUT) if $^O eq 'MSWin32';
1329 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1330 {
1331 my $sep = $1;
1332 my $non_translated_line = $3.$4;
1333 my $string = $5;
1334 my $underscore = length($2);
1335 next if $underscore eq 0 && $non_translated_line =~ /^#/;
1336 # Remove [] dummy strings
1337 my $stripped = $string;
1338 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1339 $stripped =~ s/\[\s[^\[\]]*\]$//;
1340 $non_translated_line .= $stripped;
1342 print OUTPUT $sep.$non_translated_line;
1344 if ($underscore)
1345 {
1346 my @str_list = rfc822deb_split($underscore, $string);
1348 for my $lang (sort keys %po_files_by_lang)
1349 {
1350 my $is_translated = 1;
1351 my $str_translated = '';
1352 my $first = 1;
1354 for my $str (@str_list)
1355 {
1356 my $translation = $translations{$lang, $str};
1358 if (!$translation)
1359 {
1360 $is_translated = 0;
1361 last;
1362 }
1364 # $translation may also contain [] dummy
1365 # strings, mostly to indicate an empty string
1366 $translation =~ s/\[\s[^\[\]]*\]$//;
1368 if ($first)
1369 {
1370 if ($underscore eq 2)
1371 {
1372 $str_translated .= $translation;
1373 }
1374 else
1375 {
1376 $str_translated .=
1377 Text::Tabs::expand($translation) .
1378 "\n";
1379 }
1380 }
1381 else
1382 {
1383 if ($underscore eq 2)
1384 {
1385 $str_translated .= ', ' . $translation;
1386 }
1387 else
1388 {
1389 $str_translated .= Text::Tabs::expand(
1390 Text::Wrap::wrap(' ', ' ', $translation)) .
1391 "\n .\n";
1392 }
1393 }
1394 $first = 0;
1396 # To fix some problems with Text::Wrap::wrap
1397 $str_translated =~ s/(\n )+\n/\n .\n/g;
1398 }
1399 next unless $is_translated;
1401 $str_translated =~ s/\n \.\n$//;
1402 $str_translated =~ s/\s+$//;
1404 $_ = $non_translated_line;
1405 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1406 print OUTPUT;
1407 }
1408 }
1409 }
1410 print OUTPUT "\n";
1412 close OUTPUT;
1413 close INPUT;
1414 }
1416 sub rfc822deb_split
1417 {
1418 # Debian defines a special way to deal with rfc822-style files:
1419 # when a value contain newlines, it consists of
1420 # 1. a short form (first line)
1421 # 2. a long description, all lines begin with a space,
1422 # and paragraphs are separated by a single dot on a line
1423 # This routine returns an array of all paragraphs, and reformat
1424 # them.
1425 # When first argument is 2, the string is a comma separated list of
1426 # values.
1427 my $type = shift;
1428 my $text = shift;
1429 $text =~ s/^[ \t]//mg;
1430 return (split(/, */, $text, 0)) if $type ne 1;
1431 return ($text) if $text !~ /\n/;
1433 $text =~ s/([^\n]*)\n//;
1434 my @list = ($1);
1435 my $str = '';
1437 for my $line (split (/\n/, $text))
1438 {
1439 chomp $line;
1440 if ($line =~ /^\.\s*$/)
1441 {
1442 # New paragraph
1443 $str =~ s/\s*$//;
1444 push(@list, $str);
1445 $str = '';
1446 }
1447 elsif ($line =~ /^\s/)
1448 {
1449 # Line which must not be reformatted
1450 $str .= "\n" if length ($str) && $str !~ /\n$/;
1451 $line =~ s/\s+$//;
1452 $str .= $line."\n";
1453 }
1454 else
1455 {
1456 # Continuation line, remove newline
1457 $str .= " " if length ($str) && $str !~ /\n$/;
1458 $str .= $line;
1459 }
1460 }
1462 $str =~ s/\s*$//;
1463 push(@list, $str) if length ($str);
1465 return @list;
1466 }
1468 sub quoted_translation
1469 {
1470 my ($xml_mode, $lang, $string) = @_;
1472 $string = entity_decode($string) if $xml_mode;
1473 $string =~ s/\\\"/\"/g;
1475 my $translation = $translations{$lang, $string};
1476 $translation = $string if !$translation;
1477 $translation = entity_encode($translation) if $xml_mode;
1478 $translation =~ s/\"/\\\"/g;
1479 return $translation
1480 }
1482 sub quoted_merge_translations
1483 {
1484 my ($xml_mode) = @_;
1486 if (!$MULTIPLE_OUTPUT) {
1487 print "Quoted only supports Multiple Output.\n";
1488 exit(1);
1489 }
1491 for my $lang (sort keys %po_files_by_lang) {
1492 if ( ! -d $lang ) {
1493 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1494 }
1495 open INPUT, "<${FILE}" or die;
1496 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1497 binmode (OUTPUT) if $^O eq 'MSWin32';
1498 while (<INPUT>)
1499 {
1500 s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . "ed_translation($xml_mode, $lang, $1) . "\""/ge;
1501 print OUTPUT;
1502 }
1503 close OUTPUT;
1504 close INPUT;
1505 }
1506 }