Package translate :: Package storage :: Module ts2
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.ts2

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008-2011 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Module for handling Qt linguist (.ts) files. 
 22   
 23  This will eventually replace the older ts.py which only supports the older 
 24  format. While converters haven't been updated to use this module, we retain 
 25  both. 
 26   
 27  U{TS file format 4.3<http://doc.trolltech.com/4.3/linguist-ts-file-format.html>}, 
 28  U{4.5<http://doc.trolltech.com/4.5/linguist-ts-file-format.html>}, 
 29  U{Example<http://svn.ez.no/svn/ezcomponents/trunk/Translation/docs/linguist-format.txt>}, 
 30  U{Plurals forms<http://www.koders.com/cpp/fidE7B7E83C54B9036EB7FA0F27BC56BCCFC4B9DF34.aspx#L200>} 
 31   
 32  U{Specification of the valid variable entries <http://doc.trolltech.com/4.3/qstring.html#arg>}, 
 33  U{2 <http://doc.trolltech.com/4.3/qstring.html#arg-2>} 
 34  """ 
 35   
 36  from lxml import etree 
 37   
 38  from translate.lang import data 
 39  from translate.misc.multistring import multistring 
 40  from translate.storage import base, lisa 
 41  from translate.storage.placeables import general 
 42  from translate.storage.workflow import StateEnum as state 
 43   
 44  # TODO: handle translation types 
 45   
 46  NPLURALS = { 
 47  'jp': 1, 
 48  'en': 2, 
 49  'fr': 2, 
 50  'lv': 3, 
 51  'ga': 3, 
 52  'cs': 3, 
 53  'sk': 3, 
 54  'mk': 3, 
 55  'lt': 3, 
 56  'ru': 3, 
 57  'pl': 3, 
 58  'ro': 3, 
 59  'sl': 4, 
 60  'mt': 4, 
 61  'cy': 5, 
 62  'ar': 6, 
 63  } 
 64   
 65   
66 -class tsunit(lisa.LISAunit):
67 """A single term in the xliff file.""" 68 69 rootNode = "message" 70 languageNode = "source" 71 textNode = "" 72 namespace = '' 73 rich_parsers = general.parsers 74 75 S_OBSOLETE = state.OBSOLETE 76 S_UNTRANSLATED = state.EMPTY 77 S_FUZZY = state.NEEDS_WORK 78 S_TRANSLATED = state.UNREVIEWED 79 80 statemap = { 81 "obsolete": S_OBSOLETE, 82 "unfinished": S_FUZZY, 83 "": S_TRANSLATED, 84 None: S_TRANSLATED, 85 } 86 """This maps the unit "type" attribute to state.""" 87 88 STATE = { 89 S_OBSOLETE: (state.OBSOLETE, state.EMPTY), 90 S_UNTRANSLATED: (state.EMPTY, state.NEEDS_WORK), 91 S_FUZZY: (state.NEEDS_WORK, state.UNREVIEWED), 92 S_TRANSLATED: (state.UNREVIEWED, state.MAX), 93 } 94 95 statemap_r = dict((i[1], i[0]) for i in statemap.iteritems()) 96
97 - def createlanguageNode(self, lang, text, purpose):
98 """Returns an xml Element setup with given parameters.""" 99 100 assert purpose 101 if purpose == "target": 102 purpose = "translation" 103 langset = etree.Element(self.namespaced(purpose)) 104 #TODO: check language 105 # lisa.setXMLlang(langset, lang) 106 107 langset.text = text 108 return langset
109
110 - def _getsourcenode(self):
111 return self.xmlelement.find(self.namespaced(self.languageNode))
112
113 - def _gettargetnode(self):
114 return self.xmlelement.find(self.namespaced("translation"))
115
116 - def getlanguageNodes(self):
117 """We override this to get source and target nodes.""" 118 119 def not_none(node): 120 return not node is None
121 return filter(not_none, [self._getsourcenode(), self._gettargetnode()])
122
123 - def getsource(self):
124 # TODO: support <byte>. See bug 528. 125 sourcenode = self._getsourcenode() 126 if self.hasplural(): 127 return multistring([sourcenode.text]) 128 else: 129 return data.forceunicode(sourcenode.text)
130 source = property(getsource, lisa.LISAunit.setsource) 131 rich_source = property(base.TranslationUnit._get_rich_source, base.TranslationUnit._set_rich_source) 132
133 - def settarget(self, text):
134 # This is a fairly destructive implementation. Don't assume that this 135 # is necessarily correct in all regards, but it does deal with a lot of 136 # cases. It is hard to deal with plurals. 137 # 138 # Firstly deal with reinitialising to None or setting to identical 139 # string. 140 self._rich_target = None 141 if self.gettarget() == text: 142 return 143 strings = [] 144 if isinstance(text, multistring): 145 strings = text.strings 146 elif isinstance(text, list): 147 strings = text 148 else: 149 strings = [text] 150 targetnode = self._gettargetnode() 151 type = targetnode.get("type") 152 targetnode.clear() 153 if type: 154 targetnode.set("type", type) 155 if self.hasplural() or len(strings) > 1: 156 self.xmlelement.set("numerus", "yes") 157 for string in strings: 158 numerus = etree.SubElement(targetnode, self.namespaced("numerusform")) 159 numerus.text = data.forceunicode(string) or u"" 160 # manual, nasty pretty printing. See bug 1420. 161 numerus.tail = u"\n " 162 else: 163 targetnode.text = data.forceunicode(text) or u"" 164 targetnode.tail = u"\n "
165
166 - def gettarget(self):
167 targetnode = self._gettargetnode() 168 if targetnode is None: 169 etree.SubElement(self.xmlelement, self.namespaced("translation")) 170 return None 171 if self.hasplural(): 172 numerus_nodes = targetnode.findall(self.namespaced("numerusform")) 173 return multistring([node.text or u"" for node in numerus_nodes]) 174 else: 175 return data.forceunicode(targetnode.text) or u""
176 target = property(gettarget, settarget) 177 rich_target = property(base.TranslationUnit._get_rich_target, base.TranslationUnit._set_rich_target) 178
179 - def hasplural(self):
180 return self.xmlelement.get("numerus") == "yes"
181
182 - def addnote(self, text, origin=None, position="append"):
183 """Add a note specifically in the appropriate "*comment" tag""" 184 if isinstance(text, str): 185 text = text.decode("utf-8") 186 current_notes = self.getnotes(origin) 187 self.removenotes(origin) 188 if origin in ["programmer", "developer", "source code"]: 189 note = etree.SubElement(self.xmlelement, self.namespaced("extracomment")) 190 else: 191 note = etree.SubElement(self.xmlelement, self.namespaced("translatorcomment")) 192 if position == "append": 193 note.text = "\n".join(filter(None, [current_notes, text.strip()])) 194 else: 195 note.text = text.strip()
196
197 - def getnotes(self, origin=None):
198 #TODO: consider only responding when origin has certain values 199 comments = [] 200 if origin in ["programmer", "developer", "source code", None]: 201 notenode = self.xmlelement.find(self.namespaced("extracomment")) 202 if notenode is not None and notenode.text is not None: 203 comments.append(notenode.text) 204 if origin in ["translator", None]: 205 notenode = self.xmlelement.find(self.namespaced("translatorcomment")) 206 if notenode is not None and notenode.text is not None: 207 comments.append(notenode.text) 208 return '\n'.join(comments)
209
210 - def removenotes(self, origin=None):
211 """Remove all the translator notes.""" 212 if origin in ["programmer", "developer", "source code", None]: 213 note = self.xmlelement.find(self.namespaced("extracomment")) 214 if not note is None: 215 self.xmlelement.remove(note) 216 if origin in ["translator", None]: 217 note = self.xmlelement.find(self.namespaced("translatorcomment")) 218 if not note is None: 219 self.xmlelement.remove(note)
220
221 - def _gettype(self):
222 """Returns the type of this translation.""" 223 targetnode = self._gettargetnode() 224 if targetnode is not None: 225 return targetnode.get("type") 226 return None
227
228 - def _settype(self, value=None):
229 """Set the type of this translation.""" 230 if value: 231 self._gettargetnode().set("type", value) 232 elif self._gettype(): 233 # lxml recommends against using .attrib, but there seems to be no 234 # other way 235 self._gettargetnode().attrib.pop("type")
236
237 - def isreview(self):
238 """States whether this unit needs to be reviewed""" 239 return self._gettype() == "unfinished"
240
241 - def isfuzzy(self):
242 return self._gettype() == "unfinished" and bool(self.target)
243
244 - def markfuzzy(self, value=True):
245 if value: 246 self._settype("unfinished") 247 else: 248 self._settype(None)
249
250 - def getid(self):
251 if self.source is None: 252 return None 253 context_name = self.getcontext() 254 #XXX: context_name is not supposed to be able to be None (the <name> 255 # tag is compulsary in the <context> tag) 256 if context_name is not None: 257 return context_name + self.source 258 else: 259 return self.source
260
261 - def istranslatable(self):
262 # Found a file in the wild with no context and an empty source. This 263 # served as a header, so let's classify this as not translatable. 264 # http://bibletime.svn.sourceforge.net/viewvc/bibletime/trunk/bibletime/i18n/messages/bibletime_ui.ts 265 # Furthermore, let's decide to handle obsolete units as untranslatable 266 # like we do with PO. 267 return bool(self.getid()) and not self.isobsolete()
268
269 - def getcontextname(self):
270 parent = self.xmlelement.getparent() 271 if parent is None: 272 return None 273 context = parent.find("name") 274 if context is None: 275 return None 276 return context.text
277
278 - def getcontext(self):
279 contexts = [self.getcontextname()] 280 commentnode = self.xmlelement.find(self.namespaced("comment")) 281 if commentnode is not None and commentnode.text is not None: 282 contexts.append(commentnode.text) 283 contexts = filter(None, contexts) 284 return '\n'.join(contexts)
285
286 - def addlocation(self, location):
287 if isinstance(location, str): 288 location = location.decode("utf-8") 289 newlocation = etree.SubElement(self.xmlelement, self.namespaced("location")) 290 try: 291 filename, line = location.split(':', 1) 292 except ValueError: 293 filename = location 294 line = None 295 newlocation.set("filename", filename) 296 if line is not None: 297 newlocation.set("line", line)
298
299 - def getlocations(self):
300 location_tags = self.xmlelement.iterfind(self.namespaced("location")) 301 locations = [] 302 for location_tag in location_tags: 303 location = location_tag.get("filename") 304 line = location_tag.get("line") 305 if line: 306 if location: 307 location += ':' + line 308 else: 309 location = line 310 locations.append(location) 311 return locations
312
313 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
314 super(tsunit, self).merge(otherunit, overwrite, comments) 315 #TODO: check if this is necessary: 316 if otherunit.isfuzzy(): 317 self.markfuzzy()
318
319 - def isobsolete(self):
320 return self._gettype() == "obsolete"
321
322 - def get_state_n(self):
323 type = self._gettype() 324 if type == "unfinished": 325 # We want to distinguish between fuzzy and untranslated, which the 326 # format doesn't really do 327 if self.target: 328 return self.S_FUZZY 329 else: 330 return self.S_UNTRANSLATED 331 return self.statemap[type]
332
333 - def set_state_n(self, value):
334 if value not in self.statemap_r: 335 value = self.get_state_id(value) 336 337 if value == self.S_UNTRANSLATED: 338 # No real way of representing that in the format, so we just 339 # handle it the same as unfinished 340 value = self.S_FUZZY 341 self._settype(self.statemap_r[value])
342 343
344 -class tsfile(lisa.LISAfile):
345 """Class representing a XLIFF file store.""" 346 UnitClass = tsunit 347 Name = _("Qt Linguist Translation File") 348 Mimetypes = ["application/x-linguist"] 349 Extensions = ["ts"] 350 rootNode = "TS" 351 # We will switch out .body to fit with the context we are working on 352 bodyNode = "context" 353 XMLskeleton = '''<!DOCTYPE TS> 354 <TS> 355 </TS> 356 ''' 357 namespace = '' 358
359 - def __init__(self, *args, **kwargs):
360 self._contextname = None 361 lisa.LISAfile.__init__(self, *args, **kwargs)
362
363 - def initbody(self):
364 """Initialises self.body.""" 365 self.namespace = self.document.getroot().nsmap.get(None, None) 366 self.header = self.document.getroot() 367 if self._contextname: 368 self.body = self._getcontextnode(self._contextname) 369 else: 370 self.body = self.document.getroot()
371
372 - def getsourcelanguage(self):
373 """Get the source language for this .ts file. 374 375 The 'sourcelanguage' attribute was only added to the TS format in 376 Qt v4.5. We return 'en' if there is no sourcelanguage set. 377 378 We don't implement setsourcelanguage as users really shouldn't be 379 altering the source language in .ts files, it should be set correctly 380 by the extraction tools. 381 382 @return: ISO code e.g. af, fr, pt_BR 383 @rtype: String 384 """ 385 lang = data.normalize_code(self.header.get('sourcelanguage', "en")) 386 if lang == 'en-us': 387 return 'en' 388 return lang
389
390 - def gettargetlanguage(self):
391 """Get the target language for this .ts file. 392 393 @return: ISO code e.g. af, fr, pt_BR 394 @rtype: String 395 """ 396 return data.normalize_code(self.header.get('language'))
397
398 - def settargetlanguage(self, targetlanguage):
399 """Set the target language for this .ts file to L{targetlanguage}. 400 401 @param targetlanguage: ISO code e.g. af, fr, pt_BR 402 @type targetlanguage: String 403 """ 404 if targetlanguage: 405 self.header.set('language', targetlanguage)
406
407 - def _createcontext(self, contextname, comment=None):
408 """Creates a context node with an optional comment""" 409 context = etree.SubElement(self.document.getroot(), self.namespaced(self.bodyNode)) 410 name = etree.SubElement(context, self.namespaced("name")) 411 name.text = contextname 412 if comment: 413 comment_node = context.SubElement(context, "comment") 414 comment_node.text = comment 415 return context
416
417 - def _getcontextname(self, contextnode):
418 """Returns the name of the given context node.""" 419 return contextnode.find(self.namespaced("name")).text
420
421 - def _getcontextnames(self):
422 """Returns all contextnames in this TS file.""" 423 contextnodes = self.document.findall(self.namespaced("context")) 424 contextnames = [self.getcontextname(contextnode) for contextnode in contextnodes] 425 return contextnames
426
427 - def _getcontextnode(self, contextname):
428 """Returns the context node with the given name.""" 429 contextnodes = self.document.findall(self.namespaced("context")) 430 for contextnode in contextnodes: 431 if self._getcontextname(contextnode) == contextname: 432 return contextnode 433 return None
434
435 - def addunit(self, unit, new=True, contextname=None, createifmissing=True):
436 """Adds the given unit to the last used body node (current context). 437 438 If the contextname is specified, switch to that context (creating it 439 if allowed by createifmissing).""" 440 if contextname is None: 441 contextname = unit.getcontextname() 442 443 if self._contextname != contextname: 444 if not self._switchcontext(contextname, createifmissing): 445 return None 446 super(tsfile, self).addunit(unit, new) 447 # lisa.setXMLspace(unit.xmlelement, "preserve") 448 return unit
449
450 - def _switchcontext(self, contextname, createifmissing=False):
451 """Switch the current context to the one named contextname, optionally 452 creating it if it doesn't exist.""" 453 self._contextname = contextname 454 contextnode = self._getcontextnode(contextname) 455 if contextnode is None: 456 if not createifmissing: 457 return False 458 contextnode = self._createcontext(contextname) 459 460 self.body = contextnode 461 if self.body is None: 462 return False 463 return True
464
465 - def nplural(self):
466 lang = self.header.get("language") 467 if lang in NPLURALS: 468 return NPLURALS[lang] 469 else: 470 return 1
471
472 - def __str__(self):
473 """Converts to a string containing the file's XML. 474 475 We have to override this to ensure mimic the Qt convention: 476 - no XML decleration 477 - plain DOCTYPE that lxml seems to ignore 478 """ 479 # A bug in lxml means we have to output the doctype ourselves. For 480 # more information, see: 481 # http://codespeak.net/pipermail/lxml-dev/2008-October/004112.html 482 # The problem was fixed in lxml 2.1.3 483 output = etree.tostring(self.document, pretty_print=True, 484 xml_declaration=False, encoding='utf-8') 485 if not "<!DOCTYPE TS>" in output[:30]: 486 output = "<!DOCTYPE TS>" + output 487 return output
488