11#! /usr/bin/env python
22# Originally written by Barry Warsaw <[email protected] > 3+ #
4+ # minimally patched to make it even more xgettext compatible
5+ # by Peter Funk <[email protected] > 36
47"""pygettext -- Python equivalent of xgettext(1)
58
3538 [2] http://www.gnu.org/software/gettext/gettext.html
3639
3740NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
38- where ever possible.
41+ where ever possible. However some options are still missing or are not fully
42+ implemented.
3943
4044Usage: pygettext [options] filename ...
4145
4549 --extract-all
4650 Extract all strings
4751
48- -d default-domain
49- --default-domain=default-domain
50- Rename the default output file from messages.pot to default-domain.pot
52+ -d name
53+ --default-domain=name
54+ Rename the default output file from messages.pot to name.pot
55+
56+ -E
57+ --escape
58+ replace non-ASCII characters with octal escape sequences.
59+
60+ -h
61+ --help
62+ print this help message and exit
5163
5264 -k [word]
5365 --keyword[=word]
7385 If style is omitted, Gnu is used. The style name is case
7486 insensitive. By default, locations are included.
7587
88+ -o filename
89+ --output=filename
90+ Rename the default output file from messages.pot to filename.
91+
92+ -p dir
93+ --output-dir=dir
94+ Output files will be placed in directory dir.
95+
7696 -v
7797 --verbose
7898 Print the names of the files being processed.
7999
80- --help
81- -h
82- print this help message and exit
100+ -V
101+ --version
102+ Print the version of pygettext and exit.
103+
104+ -w columns
105+ --width=columns
106+ Set width of output to columns.
107+
108+ -x filename
109+ --exclude-file=filename
110+ Specify a file that contains a list of strings that are not be
111+ extracted from the input files. Each string to be excluded must
112+ appear on a line by itself in the file.
83113
84114"""
85115
90120import getopt
91121import tokenize
92122
93- __version__ = '0.2 '
123+ __version__ = '1.0 '
94124
95125
96126
97127# for selftesting
98- def _ (s ): return s
128+ try :
129+ import fintl
130+ _ = fintl .gettext
131+ except ImportError :
132+ def _ (s ): return s
99133
100134
101135# The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
125159 print msg
126160 sys .exit (code )
127161
162+
128163
129164escapes = []
130- for i in range (256 ):
131- if i < 32 or i > 127 :
132- escapes .append ("\\ %03o" % i )
133- else :
134- escapes .append (chr (i ))
135165
136- escapes [ord ('\\ ' )] = '\\ \\ '
137- escapes [ord ('\t ' )] = '\\ t'
138- escapes [ord ('\r ' )] = '\\ r'
139- escapes [ord ('\n ' )] = '\\ n'
140- escapes [ord ('\" ' )] = '\\ "'
166+ def make_escapes (pass_iso8859 ):
167+ global escapes
168+ for i in range (256 ):
169+ if pass_iso8859 :
170+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
171+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
172+ # we escape any character outside the 32..126 range.
173+ i = i % 128
174+ if 32 <= i <= 126 :
175+ escapes .append (chr (i ))
176+ else :
177+ escapes .append ("\\ %03o" % i )
178+ escapes [ord ('\\ ' )] = '\\ \\ '
179+ escapes [ord ('\t ' )] = '\\ t'
180+ escapes [ord ('\r ' )] = '\\ r'
181+ escapes [ord ('\n ' )] = '\\ n'
182+ escapes [ord ('\" ' )] = '\\ "'
183+
141184
142185def escape (s ):
186+ global escapes
143187 s = list (s )
144188 for i in range (len (s )):
145189 s [i ] = escapes [ord (s [i ])]
@@ -200,12 +244,13 @@ def __openseen(self, ttype, tstring, lineno):
200244 # were no strings inside _(), then just ignore this entry.
201245 if self .__data :
202246 msg = string .join (self .__data , '' )
203- entry = (self .__curfile , self .__lineno )
204- linenos = self .__messages .get (msg )
205- if linenos is None :
206- self .__messages [msg ] = [entry ]
207- else :
208- linenos .append (entry )
247+ if not msg in self .__options .toexclude :
248+ entry = (self .__curfile , self .__lineno )
249+ linenos = self .__messages .get (msg )
250+ if linenos is None :
251+ self .__messages [msg ] = [entry ]
252+ else :
253+ linenos .append (entry )
209254 self .__state = self .__waiting
210255 elif ttype == tokenize .STRING :
211256 self .__data .append (safe_eval (tstring ))
@@ -222,20 +267,30 @@ def write(self, fp):
222267 sys .stdout = fp
223268 # The time stamp in the header doesn't have the same format
224269 # as that generated by xgettext...
225- print pot_header % {'time' : timestamp , 'version' :__version__ }
270+ print pot_header % {'time' : timestamp , 'version' : __version__ }
226271 for k , v in self .__messages .items ():
227- for filename , lineno in v :
228- # location comments are different b/w Solaris and GNU
229- d = {'filename' : filename ,
230- 'lineno' : lineno }
231- if options .location == options .SOLARIS :
272+ # location comments are different b/w Solaris and GNU:
273+ if options .location == options .SOLARIS :
274+ for filename , lineno in v :
275+ d = {'filename' : filename , 'lineno' : lineno }
232276 print _ ('# File: %(filename)s, line: %(lineno)d' ) % d
233- elif options .location == options .GNU :
234- print _ ('#: %(filename)s:%(lineno)d' ) % d
277+ elif options .location == options .GNU :
278+ # fit as many locations on one line, as long as the
279+ # resulting line length doesn't exceeds 'options.width'
280+ locline = '#:'
281+ for filename , lineno in v :
282+ d = {'filename' : filename , 'lineno' : lineno }
283+ s = _ (' %(filename)s:%(lineno)d' ) % d
284+ if len (locline ) + len (s ) <= options .width :
285+ locline = locline + s
286+ else :
287+ print locline
288+ locline = "#:" + s
289+ if len (locline ) > 2 :
290+ print locline
235291 # TBD: sorting, normalizing
236292 print 'msgid' , normalize (k )
237- print 'msgstr ""'
238- print
293+ print 'msgstr ""\n '
239294 finally :
240295 sys .stdout = sys .__stdout__
241296
@@ -245,9 +300,11 @@ def main():
245300 try :
246301 opts , args = getopt .getopt (
247302 sys .argv [1 :],
248- 'k:d:n:hv' ,
249- ['keyword' , 'default-domain' , 'help' ,
250- 'add-location=' , 'no-location' , 'verbose' ])
303+ 'ad:Ehk:n:o:p:Vvw:x:' ,
304+ ['extract-all' , 'default-domain' , 'escape' , 'help' , 'keyword' ,
305+ 'add-location' , 'no-location' , 'output=' , 'output-dir=' ,
306+ 'verbose' , 'version' , 'width=' , 'exclude-file=' ,
307+ ])
251308 except getopt .error , msg :
252309 usage (1 , msg )
253310
@@ -257,10 +314,15 @@ class Options:
257314 GNU = 1
258315 SOLARIS = 2
259316 # defaults
317+ extractall = 0 # FIXME: currently this option has no effect at all.
318+ escape = 0
260319 keywords = []
320+ outpath = ''
261321 outfile = 'messages.pot'
262322 location = GNU
263323 verbose = 0
324+ width = 78
325+ excludefilename = ''
264326
265327 options = Options ()
266328 locations = {'gnu' : options .GNU ,
@@ -271,12 +333,16 @@ class Options:
271333 for opt , arg in opts :
272334 if opt in ('-h' , '--help' ):
273335 usage (0 )
336+ elif opt in ('-a' , '--extract-all' ):
337+ options .extractall = 1
338+ elif opt in ('-d' , '--default-domain' ):
339+ options .outfile = arg + '.pot'
340+ elif opt in ('-E' , '--escape' ):
341+ options .escape = 1
274342 elif opt in ('-k' , '--keyword' ):
275343 if arg is None :
276344 default_keywords = []
277345 options .keywords .append (arg )
278- elif opt in ('-d' , '--default-domain' ):
279- options .outfile = arg + '.pot'
280346 elif opt in ('-n' , '--add-location' ):
281347 if arg is None :
282348 arg = 'gnu'
@@ -287,12 +353,44 @@ class Options:
287353 usage (1 , _ ('Invalid value for --add-location: %(arg)s' ) % d )
288354 elif opt in ('--no-location' ,):
289355 options .location = 0
356+ elif opt in ('-o' , '--output' ):
357+ options .outfile = arg
358+ elif opt in ('-p' , '--output-dir' ):
359+ options .outpath = arg
290360 elif opt in ('-v' , '--verbose' ):
291361 options .verbose = 1
362+ elif opt in ('-V' , '--version' ):
363+ print _ ('pygettext.py (xgettext for Python) %s' ) % __version__
364+ sys .exit (0 )
365+ elif opt in ('-w' , '--width' ):
366+ try :
367+ options .width = int (arg )
368+ except ValueError :
369+ d = {'arg' :arg }
370+ usage (1 , _ ('Invalid value for --width: %(arg)s, must be int' )
371+ % d )
372+ elif opt in ('-x' , '--exclude-file' ):
373+ options .excludefilename = arg
374+
375+ # calculate escapes
376+ make_escapes (options .escapes )
292377
293378 # calculate all keywords
294379 options .keywords .extend (default_keywords )
295380
381+ # initialize list of strings to exclude
382+ if options .excludefilename :
383+ try :
384+ fp = open (options .excludefilename )
385+ options .toexclude = fp .readlines ()
386+ fp .close ()
387+ except IOError :
388+ sys .stderr .write (_ ("Can't read --exclude-file: %s" ) %
389+ options .excludefilename )
390+ sys .exit (1 )
391+ else :
392+ options .toexclude = []
393+
296394 # slurp through all the files
297395 eater = TokenEater (options )
298396 for filename in args :
@@ -303,6 +401,8 @@ class Options:
303401 tokenize .tokenize (fp .readline , eater )
304402 fp .close ()
305403
404+ if options .outpath :
405+ options .outfile = os .path .join (options .outpath , options .outfile )
306406 fp = open (options .outfile , 'w' )
307407 eater .write (fp )
308408 fp .close ()
0 commit comments