summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 52c4073)
raw | patch | inline | side by side (parent: 52c4073)
author | jhermann <jhermann@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Wed, 19 Dec 2001 00:32:13 +0000 (00:32 +0000) | ||
committer | jhermann <jhermann@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Wed, 19 Dec 2001 00:32:13 +0000 (00:32 +0000) |
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@477 57a73879-2fb5-44c3-a270-3262357dd7e2
tools/pygettext.py | patch | blob | history |
diff --git a/tools/pygettext.py b/tools/pygettext.py
index 40509673ace8b5c0319de19d1b2a2e98cc6981f7..97ba489e7145524c455bd64b44574969df931f40 100644 (file)
--- a/tools/pygettext.py
+++ b/tools/pygettext.py
#! /usr/bin/env python
-# Originally written by Barry Warsaw <bwarsaw@python.org>
+# Originally written by Barry Warsaw <barry@zope.com>
#
-# minimally patched to make it even more xgettext compatible
+# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
#
-# 2001-11-21 Jürgen Hermann <jh@web.de>
-# Checks that _() only contains string literals added, and
+# 2001-12-18 Jürgen Hermann <jh@web.de>
+# Added checks that _() only contains string literals, and
# command line args are resolved to module lists, i.e. you
# can now pass a filename, a module or package name, or a
# directory (including globbing chars, important for Win32).
+# Made docstring fit in 80 chars wide displays using pydoc.
#
# for selftesting
except ImportError:
_ = lambda s: s
-
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
-internationalization of C programs. Most of these tools are independent of
-the programming language and can be used from within Python programs. Martin
-von Loewis' work[1] helps considerably in this regard.
+internationalization of C programs. Most of these tools are independent of
+the programming language and can be used from within Python programs.
+Martin von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python introduces
-a few wrinkles, such as dual quoting characters, triple quoted strings, and
-raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan Python
-source code, generating .pot files identical to what GNU xgettext[2] generates
-for C and C++ code. From there, the standard GNU tools can be used.
-
-A word about marking Python strings as candidates for translation. GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
-gettext_noop. But those can be a lot of text to include all over your code.
-C and C++ have a trick: they use the C preprocessor. Most internationalized C
-source includes a #define for gettext() to _() so that what has to be written
-in the source is much less. Thus these are both translatable strings:
+looking for message strings, but it groks only C (or C++). Python
+introduces a few wrinkles, such as dual quoting characters, triple quoted
+strings, and raw strings. xgettext understands none of this.
+
+Enter pygettext, which uses Python's standard tokenize module to scan
+Python source code, generating .pot files identical to what GNU xgettext[2]
+generates for C and C++ code. From there, the standard GNU tools can be
+used.
+
+A word about marking Python strings as candidates for translation. GNU
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
+and gettext_noop. But those can be a lot of text to include all over your
+code. C and C++ have a trick: they use the C preprocessor. Most
+internationalized C source includes a #define for gettext() to _() so that
+what has to be written in the source is much less. Thus these are both
+translatable strings:
gettext("Translatable String")
_("Translatable String")
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] http://www.gnu.org/software/gettext/gettext.html
-NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible. However some options are still missing or are not fully
-implemented. Also, xgettext's use of command line switches with option
-arguments is broken, and in these cases, pygettext just defines additional
-switches.
+NOTE: pygettext attempts to be option and feature compatible with GNU
+xgettext where ever possible. However some options are still missing or are
+not fully implemented. Also, xgettext's use of command line switches with
+option arguments is broken, and in these cases, pygettext just defines
+additional switches.
Usage: pygettext [options] inputfile ...
-a
--extract-all
- Extract all strings
+ Extract all strings.
-d name
--default-domain=name
- Rename the default output file from messages.pot to name.pot
+ Rename the default output file from messages.pot to name.pot.
-E
--escape
- replace non-ASCII characters with octal escape sequences.
+ Replace non-ASCII characters with octal escape sequences.
+
+ -D
+ --docstrings
+ Extract module, class, method, and function docstrings. These do
+ not need to be wrapped in _() markers, and in fact cannot be for
+ Python to consider them docstrings. (See also the -X option).
-h
--help
- print this help message and exit
+ Print this help message and exit.
-k word
--keyword=word
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
+ -o filename
+ --output=filename
+ Rename the default output file from messages.pot to filename. If
+ filename is `-' then the output is sent to standard out.
+
+ -p dir
+ --output-dir=dir
+ Output files will be placed in directory dir.
+
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
The style name is case insensitive. GNU style is the default.
- -o filename
- --output=filename
- Rename the default output file from messages.pot to filename. If
- filename is `-' then the output is sent to standard out.
-
- -p dir
- --output-dir=dir
- Output files will be placed in directory dir.
-
-v
--verbose
Print the names of the files being processed.
extracted from the input files. Each string to be excluded must
appear on a line by itself in the file.
-If `inputfile' is -, standard input is read.
+ -X filename
+ --no-docstrings=filename
+ Specify a file that contains a list of files (one per line) that
+ should not have their docstrings extracted. This is only useful in
+ conjunction with the -D option above.
+If `inputfile' is -, standard input is read.
""")
import os
import getopt
import token
import tokenize
+import operator
-__version__ = '1.1'
+__version__ = '1.5'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
\f
-# The normal pot-file header. msgmerge and EMACS' po-mode work better if
-# it's there.
+# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
+# there.
pot_header = _('''\
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
-"PO-Revision-Date: %(time)s\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
\f
def usage(code, msg=''):
- print __doc__ % globals()
+ print >> sys.stderr, __doc__ % globals()
if msg:
- print msg
+ print >> sys.stderr, msg
sys.exit(code)
return s
\f
-
def containsAny(str, set):
""" Check whether 'str' contains ANY of the chars in 'set'
"""
self.__state = self.__waiting
self.__data = []
self.__lineno = -1
+ self.__freshmodule = 1
+ self.__curfile = None
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
+## import token
+## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+## 'tstring:', tstring
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
- if ttype == tokenize.NAME and tstring in self.__options.keywords:
+ opts = self.__options
+ # Do docstring extractions, if enabled
+ if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
+ # module docstring?
+ if self.__freshmodule:
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__freshmodule = 0
+ elif ttype not in (tokenize.COMMENT, tokenize.NL):
+ self.__freshmodule = 0
+ return
+ # class docstring?
+ if ttype == tokenize.NAME and tstring in ('class', 'def'):
+ self.__state = self.__suiteseen
+ return
+ if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
+ def __suiteseen(self, ttype, tstring, lineno):
+ # ignore anything until we see the colon
+ if ttype == tokenize.OP and tstring == ':':
+ self.__state = self.__suitedocstring
+
+ def __suitedocstring(self, ttype, tstring, lineno):
+ # ignore any intervening noise
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__state = self.__waiting
+ elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
+ tokenize.COMMENT):
+ # there was no class docstring
+ self.__state = self.__waiting
+
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
self.__data = []
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
- msg = EMPTYSTRING.join(self.__data)
- if not msg in self.__options.toexclude:
- entry = (self.__curfile, self.__lineno)
- linenos = self.__messages.get(msg)
- if linenos is None:
- self.__messages[msg] = [entry]
- else:
- linenos.append(entry)
+ self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
self.__state = self.__waiting
+ def __addentry(self, msg, lineno=None, isdocstring=0):
+ if lineno is None:
+ lineno = self.__lineno
+ if not msg in self.__options.toexclude:
+ entry = (self.__curfile, lineno)
+ self.__messages.setdefault(msg, {})[entry] = isdocstring
+
def set_filename(self, filename):
self.__curfile = filename
+ self.__freshmodule = 1
def write(self, fp):
options = self.__options
timestamp = time.ctime(time.time())
- # common header
- try:
- sys.stdout = fp
- # The time stamp in the header doesn't have the same format
- # as that generated by xgettext...
- print pot_header % {'time': timestamp, 'version': __version__}
- for k, v in self.__messages.items():
+ # The time stamp in the header doesn't have the same format as that
+ # generated by xgettext...
+ print >> fp, pot_header % {'time': timestamp, 'version': __version__}
+ # Sort the entries. First sort each particular entry's keys, then
+ # sort all the entries by their first item.
+ reverse = {}
+ for k, v in self.__messages.items():
+ keys = v.keys()
+ keys.sort()
+ reverse.setdefault(tuple(keys), []).append((k, v))
+ rkeys = reverse.keys()
+ rkeys.sort()
+ for rkey in rkeys:
+ rentries = reverse[rkey]
+ rentries.sort()
+ for k, v in rentries:
+ isdocstring = 0
+ # If the entry was gleaned out of a docstring, then add a
+ # comment stating so. This is to aid translators who may wish
+ # to skip translating some unimportant docstrings.
+ if reduce(operator.__add__, v.values()):
+ isdocstring = 1
+ # k is the message string, v is a dictionary-set of (filename,
+ # lineno) tuples. We want to sort the entries in v first by
+ # file name and then by line number.
+ v = v.keys()
+ v.sort()
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
- print _('# File: %(filename)s, line: %(lineno)d') % d
+ print >>fp, _(
+ '# File: %(filename)s, line: %(lineno)d') % d
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
- print locline
+ print >> fp, locline
locline = "#:" + s
if len(locline) > 2:
- print locline
- # TBD: sorting, normalizing
- print 'msgid', normalize(k)
- print 'msgstr ""\n'
- finally:
- sys.stdout = sys.__stdout__
+ print >> fp, locline
+ if isdocstring:
+ print >> fp, '#, docstring'
+ print >> fp, 'msgid', normalize(k)
+ print >> fp, 'msgstr ""\n'
+
\f
def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
- 'ad:Ehk:Kno:p:S:Vvw:x:',
- ['extract-all', 'default-domain', 'escape', 'help',
+ 'ad:DEhk:Kno:p:S:Vvw:x:X:',
+ ['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
+ 'docstrings', 'no-docstrings',
])
except getopt.error, msg:
usage(1, msg)
verbose = 0
width = 78
excludefilename = ''
+ docstrings = 0
+ nodocstrings = {}
options = Options()
locations = {'gnu' : options.GNU,
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
+ elif opt in ('-D', '--docstrings'):
+ options.docstrings = 1
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):
usage(1, _('--width argument must be an integer: %s') % arg)
elif opt in ('-x', '--exclude-file'):
options.excludefilename = arg
+ elif opt in ('-X', '--no-docstrings'):
+ fp = open(arg)
+ try:
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ options.nodocstrings[line[:-1]] = 1
+ finally:
+ fp.close()
# calculate escapes
make_escapes(options.escape)
options.toexclude = fp.readlines()
fp.close()
except IOError:
- sys.stderr.write(_("Can't read --exclude-file: %s") %
- options.excludefilename)
+ print >> sys.stderr, _(
+ "Can't read --exclude-file: %s") % options.excludefilename
sys.exit(1)
else:
options.toexclude = []
# resolve args to module lists
expanded = []
for arg in args:
- expanded.extend(getFilesForName(arg))
+ if arg == '-':
+ expanded.append(arg)
+ else:
+ expanded.extend(getFilesForName(arg))
args = expanded
# slurp through all the files
closep = 1
try:
eater.set_filename(filename)
- tokenize.tokenize(fp.readline, eater)
+ try:
+ tokenize.tokenize(fp.readline, eater)
+ except tokenize.TokenError, e:
+ print >> sys.stderr, '%s: %s, line %d, column %d' % (
+ e[0], filename, e[1][0], e[1][1])
finally:
if closep:
fp.close()