1 #! /usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
4 # Plural forms support added by alexander smishlajev <alex@tycobka.lv>
6 """Generate binary message catalog from textual translation description.
8 This program converts a textual Uniforum-style message catalog (.po file) into
9 a binary GNU catalog (.mo file). This is essentially the same function as the
10 GNU msgfmt program, however, it is a simpler implementation.
12 Usage: msgfmt.py [OPTIONS] filename.po
14 Options:
15 -o file
16 --output-file=file
17 Specify the output file to write to. If omitted, output will go to a
18 file named filename.mo (based off the input file name).
20 -h
21 --help
22 Print this message and exit.
24 -V
25 --version
26 Display version information and exit.
27 """
29 import sys
30 import os
31 import getopt
32 import struct
33 import array
35 __version__ = "1.1"
37 MESSAGES = {}
40 \f
41 def usage(code, msg=''):
42 print >> sys.stderr, __doc__
43 if msg:
44 print >> sys.stderr, msg
45 sys.exit(code)
48 \f
49 def add(id, str, fuzzy):
50 "Add a non-fuzzy translation to the dictionary."
51 global MESSAGES
52 if not fuzzy and str and not str.startswith('\0'):
53 MESSAGES[id] = str
56 \f
57 def generate():
58 "Return the generated output."
59 global MESSAGES
60 keys = MESSAGES.keys()
61 # the keys are sorted in the .mo file
62 keys.sort()
63 offsets = []
64 ids = strs = ''
65 for id in keys:
66 # For each string, we need size and file offset. Each string is NUL
67 # terminated; the NUL does not count into the size.
68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
69 ids += id + '\0'
70 strs += MESSAGES[id] + '\0'
71 output = ''
72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so
73 # the keys start right after the index tables.
74 # translated string.
75 keystart = 7*4+16*len(keys)
76 # and the values start after the keys
77 valuestart = keystart + len(ids)
78 koffsets = []
79 voffsets = []
80 # The string table first has the list of keys, then the list of values.
81 # Each entry has first the size of the string, then the file offset.
82 for o1, l1, o2, l2 in offsets:
83 koffsets += [l1, o1+keystart]
84 voffsets += [l2, o2+valuestart]
85 offsets = koffsets + voffsets
86 output = struct.pack("Iiiiiii",
87 0x950412deL, # Magic
88 0, # Version
89 len(keys), # # of entries
90 7*4, # start of key index
91 7*4+len(keys)*8, # start of value index
92 0, 0) # size and offset of hash table
93 output += array.array("i", offsets).tostring()
94 output += ids
95 output += strs
96 return output
99 \f
100 def make(filename, outfile):
101 ID = 1
102 STR = 2
103 global MESSAGES
104 MESSAGES = {}
106 # Compute .mo name from .po name and arguments
107 if filename.endswith('.po'):
108 infile = filename
109 else:
110 infile = filename + '.po'
111 if outfile is None:
112 outfile = os.path.splitext(infile)[0] + '.mo'
114 try:
115 lines = open(infile).readlines()
116 except IOError, msg:
117 print >> sys.stderr, msg
118 sys.exit(1)
120 # remove UTF-8 Byte Order Mark, if any.
121 # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled)
122 if lines[0].startswith('\xEF\xBB\xBF'):
123 lines[0] = lines[0][3:]
125 section = None
126 fuzzy = 0
128 # Parse the catalog
129 lno = 0
130 for l in lines:
131 lno += 1
132 # If we get a comment line after a msgstr, this is a new entry
133 if l[0] == '#' and section == STR:
134 add(msgid, msgstr, fuzzy)
135 section = None
136 fuzzy = 0
137 # Record a fuzzy mark
138 if l[:2] == '#,' and (l.find('fuzzy') >= 0):
139 fuzzy = 1
140 # Skip comments
141 if l[0] == '#':
142 continue
143 # Start of msgid_plural section, separate from singular form with \0
144 if l.startswith('msgid_plural'):
145 msgid += '\0'
146 l = l[12:]
147 # Now we are in a msgid section, output previous section
148 elif l.startswith('msgid'):
149 if section == STR:
150 add(msgid, msgstr, fuzzy)
151 section = ID
152 l = l[5:]
153 msgid = msgstr = ''
154 # Now we are in a msgstr section
155 elif l.startswith('msgstr'):
156 section = STR
157 l = l[6:]
158 # Check for plural forms
159 if l.startswith('['):
160 # Separate plural forms with \0
161 if not l.startswith('[0]'):
162 msgstr += '\0'
163 # Ignore the index - must come in sequence
164 l = l[l.index(']') + 1:]
165 # Skip empty lines
166 l = l.strip()
167 if not l:
168 continue
169 # XXX: Does this always follow Python escape semantics?
170 l = eval(l)
171 if section == ID:
172 msgid += l
173 elif section == STR:
174 msgstr += l
175 else:
176 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
177 'before:'
178 print >> sys.stderr, l
179 sys.exit(1)
180 # Add last entry
181 if section == STR:
182 add(msgid, msgstr, fuzzy)
184 # Compute output
185 output = generate()
187 try:
188 open(outfile,"wb").write(output)
189 except IOError,msg:
190 print >> sys.stderr, msg
193 \f
194 def main():
195 try:
196 opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
197 ['help', 'version', 'output-file='])
198 except getopt.error, msg:
199 usage(1, msg)
201 outfile = None
202 # parse options
203 for opt, arg in opts:
204 if opt in ('-h', '--help'):
205 usage(0)
206 elif opt in ('-V', '--version'):
207 print >> sys.stderr, "msgfmt.py", __version__
208 sys.exit(0)
209 elif opt in ('-o', '--output-file'):
210 outfile = arg
211 # do it
212 if not args:
213 print >> sys.stderr, 'No input file given'
214 print >> sys.stderr, "Try `msgfmt --help' for more information."
215 return
217 for filename in args:
218 make(filename, outfile)
221 if __name__ == '__main__':
222 main()
224 # vim: set et sts=4 sw=4 :