roundup/msgfmt.py

   1 #! /usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
   4 # Plural forms support added by alexander smishlajev <alex@tycobka.lv>
   5
   6 """Generate binary message catalog from textual translation description.
   7
   8 This program converts a textual Uniforum-style message catalog (.po file) into
   9 a binary GNU catalog (.mo file).  This is essentially the same function as the
  10 GNU msgfmt program, however, it is a simpler implementation.
  11
  12 Usage: msgfmt.py [OPTIONS] filename.po
  13
  14 Options:
  15     -o file
  16     --output-file=file
  17         Specify the output file to write to.  If omitted, output will go to a
  18         file named filename.mo (based off the input file name).
  19
  20     -h
  21     --help
  22         Print this message and exit.
  23
  24     -V
  25     --version
  26         Display version information and exit.
  27 """
  28
  29 import sys
  30 import os
  31 import getopt
  32 import struct
  33 import array
  34
  35 __version__ = "1.1"
  36
  37 MESSAGES = {}
  38
  39
  40 \f
  41 def usage(code, msg=''):
  42     print >> sys.stderr, __doc__
  43     if msg:
  44         print >> sys.stderr, msg
  45     sys.exit(code)
  46
  47
  48 \f
  49 def add(id, str, fuzzy):
  50     "Add a non-fuzzy translation to the dictionary."
  51     global MESSAGES
  52     if not fuzzy and str and not str.startswith('\0'):
  53         MESSAGES[id] = str
  54
  55
  56 \f
  57 def generate():
  58     "Return the generated output."
  59     global MESSAGES
  60     keys = MESSAGES.keys()
  61     # the keys are sorted in the .mo file
  62     keys.sort()
  63     offsets = []
  64     ids = strs = ''
  65     for id in keys:
  66         # For each string, we need size and file offset.  Each string is NUL
  67         # terminated; the NUL does not count into the size.
  68         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
  69         ids += id + '\0'
  70         strs += MESSAGES[id] + '\0'
  71     output = ''
  72     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
  73     # the keys start right after the index tables.
  74     # translated string.
  75     keystart = 7*4+16*len(keys)
  76     # and the values start after the keys
  77     valuestart = keystart + len(ids)
  78     koffsets = []
  79     voffsets = []
  80     # The string table first has the list of keys, then the list of values.
  81     # Each entry has first the size of the string, then the file offset.
  82     for o1, l1, o2, l2 in offsets:
  83         koffsets += [l1, o1+keystart]
  84         voffsets += [l2, o2+valuestart]
  85     offsets = koffsets + voffsets
  86     output = struct.pack("Iiiiiii",
  87                          0x950412deL,       # Magic
  88                          0,                 # Version
  89                          len(keys),         # # of entries
  90                          7*4,               # start of key index
  91                          7*4+len(keys)*8,   # start of value index
  92                          0, 0)              # size and offset of hash table
  93     output += array.array("i", offsets).tostring()
  94     output += ids
  95     output += strs
  96     return output
  97
  98
  99 \f
 100 def make(filename, outfile):
 101     ID = 1
 102     STR = 2
 103     global MESSAGES
 104     MESSAGES = {}
 105
 106     # Compute .mo name from .po name and arguments
 107     if filename.endswith('.po'):
 108         infile = filename
 109     else:
 110         infile = filename + '.po'
 111     if outfile is None:
 112         outfile = os.path.splitext(infile)[0] + '.mo'
 113
 114     try:
 115         lines = open(infile).readlines()
 116     except IOError, msg:
 117         print >> sys.stderr, msg
 118         sys.exit(1)
 119
 120     # remove UTF-8 Byte Order Mark, if any.
 121     # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled)
 122     if lines[0].startswith('\xEF\xBB\xBF'):
 123         lines[0] = lines[0][3:]
 124
 125     section = None
 126     fuzzy = 0
 127
 128     # Parse the catalog
 129     lno = 0
 130     for l in lines:
 131         lno += 1
 132         # If we get a comment line after a msgstr, this is a new entry
 133         if l[0] == '#' and section == STR:
 134             add(msgid, msgstr, fuzzy)
 135             section = None
 136             fuzzy = 0
 137         # Record a fuzzy mark
 138         if l[:2] == '#,' and (l.find('fuzzy') >= 0):
 139             fuzzy = 1
 140         # Skip comments
 141         if l[0] == '#':
 142             continue
 143         # Start of msgid_plural section, separate from singular form with \0
 144         if l.startswith('msgid_plural'):
 145             msgid += '\0'
 146             l = l[12:]
 147         # Now we are in a msgid section, output previous section
 148         elif l.startswith('msgid'):
 149             if section == STR:
 150                 add(msgid, msgstr, fuzzy)
 151             section = ID
 152             l = l[5:]
 153             msgid = msgstr = ''
 154         # Now we are in a msgstr section
 155         elif l.startswith('msgstr'):
 156             section = STR
 157             l = l[6:]
 158             # Check for plural forms
 159             if l.startswith('['):
 160                 # Separate plural forms with \0
 161                 if not l.startswith('[0]'):
 162                     msgstr += '\0'
 163                 # Ignore the index - must come in sequence
 164                 l = l[l.index(']') + 1:]
 165         # Skip empty lines
 166         l = l.strip()
 167         if not l:
 168             continue
 169         # XXX: Does this always follow Python escape semantics?
 170         l = eval(l)
 171         if section == ID:
 172             msgid += l
 173         elif section == STR:
 174             msgstr += l
 175         else:
 176             print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
 177                   'before:'
 178             print >> sys.stderr, l
 179             sys.exit(1)
 180     # Add last entry
 181     if section == STR:
 182         add(msgid, msgstr, fuzzy)
 183
 184     # Compute output
 185     output = generate()
 186
 187     try:
 188         open(outfile,"wb").write(output)
 189     except IOError,msg:
 190         print >> sys.stderr, msg
 191
 192
 193 \f
 194 def main():
 195     try:
 196         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
 197                                    ['help', 'version', 'output-file='])
 198     except getopt.error, msg:
 199         usage(1, msg)
 200
 201     outfile = None
 202     # parse options
 203     for opt, arg in opts:
 204         if opt in ('-h', '--help'):
 205             usage(0)
 206         elif opt in ('-V', '--version'):
 207             print >> sys.stderr, "msgfmt.py", __version__
 208             sys.exit(0)
 209         elif opt in ('-o', '--output-file'):
 210             outfile = arg
 211     # do it
 212     if not args:
 213         print >> sys.stderr, 'No input file given'
 214         print >> sys.stderr, "Try `msgfmt --help' for more information."
 215         return
 216
 217     for filename in args:
 218         make(filename, outfile)
 219
 220
 221 if __name__ == '__main__':
 222     main()
 223
 224 # vim: set et sts=4 sw=4 :