1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Create string and word counts for supported localization files including:
22 XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc
23
24 See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and
25 usage instructions
26 """
27
28 from optparse import OptionParser
29 import os
30 import sys
31
32 from translate.storage import factory
33 from translate.storage import statsdb
34
35
36 style_full, style_csv, style_short_strings, style_short_words = range(4)
37
38
39 default_style = style_full
40
41
43 """This is the previous implementation of calcstats() and is left for
44 comparison and debuging purposes."""
45
46 try:
47 store = factory.getobject(filename)
48 except ValueError, e:
49 print str(e)
50 return {}
51 units = filter(lambda unit: not unit.isheader(), store.units)
52 translated = translatedmessages(units)
53 fuzzy = fuzzymessages(units)
54 review = filter(lambda unit: unit.isreview(), units)
55 untranslated = untranslatedmessages(units)
56 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units))
57 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist))
58 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist))
59 stats = {}
60
61
62 stats["translated"] = len(translated)
63 stats["fuzzy"] = len(fuzzy)
64 stats["untranslated"] = len(untranslated)
65 stats["review"] = len(review)
66 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"]
67
68
69 stats["translatedsourcewords"] = sourcewords(translated)
70 stats["translatedtargetwords"] = targetwords(translated)
71 stats["fuzzysourcewords"] = sourcewords(fuzzy)
72 stats["untranslatedsourcewords"] = sourcewords(untranslated)
73 stats["reviewsourcewords"] = sourcewords(review)
74 stats["totalsourcewords"] = stats["translatedsourcewords"] + \
75 stats["fuzzysourcewords"] + \
76 stats["untranslatedsourcewords"]
77 return stats
78
79
83
84
86 """
87 Print summary for a .po file in specified format.
88
89 @param title: name of .po file
90 @param stats: array with translation statistics for the file specified
91 @param indent: indentation of the 2nd column (length of longest filename)
92 @param incomplete_only: omit fully translated files
93 @type incomplete_only: Boolean
94 @rtype: Boolean
95 @return: 1 if counting incomplete files (incomplete_only=True) and the
96 file is completely translated, 0 otherwise
97 """
98
99 def percent(denominator, devisor):
100 if devisor == 0:
101 return 0
102 else:
103 return denominator * 100 / devisor
104
105 if incomplete_only and (stats["total"] == stats["translated"]):
106 return 1
107
108 if (style == style_csv):
109 print "%s, " % title,
110 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]),
111 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]),
112 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]),
113 print "%d, %d" % (stats["total"], stats["totalsourcewords"]),
114 if stats["review"] > 0:
115 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]),
116 print
117 elif (style == style_short_strings):
118 spaces = " " * (indent - len(title))
119 print "%s%s strings: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \
120 stats["total"], stats["translated"], stats["fuzzy"], stats["untranslated"], \
121 percent(stats["translated"], stats["total"]), \
122 percent(stats["fuzzy"], stats["total"]), \
123 percent(stats["untranslated"], stats["total"]))
124 elif (style == style_short_words):
125 spaces = " " * (indent - len(title))
126 print "%s%s source words: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \
127 stats["totalsourcewords"], stats["translatedsourcewords"], stats["fuzzysourcewords"], stats["untranslatedsourcewords"], \
128 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \
129 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]), \
130 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"]))
131 else:
132 print title
133 print "type strings words (source) words (translation)"
134 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \
135 (stats["translated"], \
136 percent(stats["translated"], stats["total"]), \
137 stats["translatedsourcewords"], \
138 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \
139 stats["translatedtargetwords"])
140 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \
141 (stats["fuzzy"], \
142 percent(stats["fuzzy"], stats["total"]), \
143 stats["fuzzysourcewords"], \
144 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]))
145 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \
146 (stats["untranslated"], \
147 percent(stats["untranslated"], stats["total"]), \
148 stats["untranslatedsourcewords"], \
149 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"]))
150 print "Total: %5d %17d %22d" % \
151 (stats["total"], \
152 stats["totalsourcewords"], \
153 stats["translatedtargetwords"])
154 if "extended" in stats:
155 print ""
156 for state, e_stats in stats["extended"].iteritems():
157 print "%s: %5d (%3d%%) %10d (%3d%%) %15d" % (
158 state, e_stats["units"], percent(e_stats["units"], stats["total"]),
159 e_stats["sourcewords"], percent(e_stats["sourcewords"], stats["totalsourcewords"]),
160 e_stats["targetwords"])
161
162 if stats["review"] > 0:
163 print "review: %5d %17d n/a" % \
164 (stats["review"], stats["reviewsourcewords"])
165 print
166 return 0
167
168
171
172
175
176
179
180
182
184 self.totals = {}
185 self.filecount = 0
186 self.longestfilename = 0
187 self.style = style
188 self.incomplete_only = incomplete_only
189 self.complete_count = 0
190
191 if (self.style == style_csv):
192 print "Filename, Translated Messages, Translated Source Words, Translated \
193 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \
194 Untranslated Source Words, Total Message, Total Source Words, \
195 Review Messages, Review Source Words"
196 if (self.style == style_short_strings or self.style == style_short_words):
197 for filename in filenames:
198 if (len(filename) > self.longestfilename):
199 self.longestfilename = len(filename)
200 for filename in filenames:
201 if not os.path.exists(filename):
202 print >> sys.stderr, "cannot process %s: does not exist" % filename
203 continue
204 elif os.path.isdir(filename):
205 self.handledir(filename)
206 else:
207 self.handlefile(filename)
208 if self.filecount > 1 and (self.style == style_full):
209 if self.incomplete_only:
210 summarize("TOTAL (incomplete only):", self.totals,
211 incomplete_only=True)
212 print "File count (incomplete): %5d" % (self.filecount - self.complete_count)
213 else:
214 summarize("TOTAL:", self.totals, incomplete_only=False)
215 print "File count: %5d" % (self.filecount)
216 print
217
219 """Update self.totals with the statistics in stats."""
220 for key in stats.keys():
221 if key == "extended":
222
223 continue
224 if not key in self.totals:
225 self.totals[key] = 0
226 self.totals[key] += stats[key]
227
229 try:
230 stats = calcstats(filename)
231 self.updatetotals(stats)
232 self.complete_count += summarize(filename, stats, self.style,
233 self.longestfilename,
234 self.incomplete_only)
235 self.filecount += 1
236 except:
237 print >> sys.stderr, sys.exc_info()[1]
238
240 for filename in filenames:
241 pathname = os.path.join(dirname, filename)
242 if os.path.isdir(pathname):
243 self.handledir(pathname)
244 else:
245 self.handlefile(pathname)
246
248 path, name = os.path.split(dirname)
249 if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]:
250 return
251 entries = os.listdir(dirname)
252 self.handlefiles(dirname, entries)
253
254
256 parser = OptionParser(usage="usage: %prog [options] po-files")
257 parser.add_option("--incomplete", action="store_const", const=True,
258 dest="incomplete_only",
259 help="skip 100% translated files.")
260
261 parser.add_option("--full", action="store_const", const=style_csv,
262 dest="style_full",
263 help="(default) statistics in full, verbose format")
264 parser.add_option("--csv", action="store_const", const=style_csv,
265 dest="style_csv",
266 help="statistics in CSV format")
267 parser.add_option("--short", action="store_const", const=style_csv,
268 dest="style_short_strings",
269 help="same as --short-strings")
270 parser.add_option("--short-strings", action="store_const",
271 const=style_csv, dest="style_short_strings",
272 help="statistics of strings in short format - one line per file")
273 parser.add_option("--short-words", action="store_const",
274 const=style_csv, dest="style_short_words",
275 help="statistics of words in short format - one line per file")
276
277 (options, args) = parser.parse_args()
278
279 if (options.incomplete_only == None):
280 options.incomplete_only = False
281
282 if (options.style_full and options.style_csv) or \
283 (options.style_full and options.style_short_strings) or \
284 (options.style_full and options.style_short_words) or \
285 (options.style_csv and options.style_short_strings) or \
286 (options.style_csv and options.style_short_words) or \
287 (options.style_short_strings and options.style_short_words):
288 parser.error("options --full, --csv, --short-strings and --short-words are mutually exclusive")
289 sys.exit(2)
290
291 style = default_style
292 if options.style_csv:
293 style = style_csv
294 if options.style_full:
295 style = style_full
296 if options.style_short_strings:
297 style = style_short_strings
298 if options.style_short_words:
299 style = style_short_words
300
301 try:
302 import psyco
303 psyco.full()
304 except Exception:
305 pass
306
307 summarizer(args, style, options.incomplete_only)
308
309 if __name__ == '__main__':
310 main()
311