1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Module for handling XLIFF files for translation.
22
23 The official recommendation is to use the extention .xlf for XLIFF files.
24 """
25
26 from lxml import etree
27
28 from translate.misc.multistring import multistring
29 from translate.storage import base, lisa
30 from translate.storage.lisa import getXMLspace
31 from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml
32 from translate.storage.workflow import StateEnum as state
33
34
35
36 ID_SEPARATOR = u"\04"
37
38
39
40
41
42 ID_SEPARATOR_SAFE = u"__%04__"
43
44
46 """A single term in the xliff file."""
47
48 rootNode = "trans-unit"
49 languageNode = "source"
50 textNode = ""
51 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
52
53 _default_xml_space = "default"
54
55
56
57 S_UNTRANSLATED = state.EMPTY
58 S_NEEDS_TRANSLATION = state.NEEDS_WORK
59 S_NEEDS_REVIEW = state.NEEDS_REVIEW
60 S_TRANSLATED = state.UNREVIEWED
61 S_SIGNED_OFF = state.FINAL
62 S_FINAL = state.MAX
63
64 statemap = {
65 "new": S_UNTRANSLATED + 1,
66 "needs-translation": S_NEEDS_TRANSLATION,
67 "needs-adaptation": S_NEEDS_TRANSLATION + 1,
68 "needs-l10n": S_NEEDS_TRANSLATION + 2,
69 "needs-review-translation": S_NEEDS_REVIEW,
70 "needs-review-adaptation": S_NEEDS_REVIEW + 1,
71 "needs-review-l10n": S_NEEDS_REVIEW + 2,
72 "translated": S_TRANSLATED,
73 "signed-off": S_SIGNED_OFF,
74 "final": S_FINAL,
75 }
76
77 statemap_r = dict((i[1], i[0]) for i in statemap.iteritems())
78
79 STATE = {
80 S_UNTRANSLATED: (state.EMPTY, state.NEEDS_WORK),
81 S_NEEDS_TRANSLATION: (state.NEEDS_WORK, state.NEEDS_REVIEW),
82 S_NEEDS_REVIEW: (state.NEEDS_REVIEW, state.UNREVIEWED),
83 S_TRANSLATED: (state.UNREVIEWED, state.FINAL),
84 S_SIGNED_OFF: (state.FINAL, state.MAX),
85 }
86
87 - def __init__(self, source, empty=False, **kwargs):
88 """Override the constructor to set xml:space="preserve"."""
89 super(xliffunit, self).__init__(source, empty, **kwargs)
90 if empty:
91 return
92 lisa.setXMLspace(self.xmlelement, "preserve")
93
95 """Returns an xml Element setup with given parameters."""
96
97
98
99
100 assert purpose
101 langset = etree.Element(self.namespaced(purpose))
102
103
104
105 langset.text = text
106 return langset
107
123
125 sourcelanguageNode = self.get_source_dom()
126 if sourcelanguageNode is None:
127 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source")
128 self.set_source_dom(sourcelanguageNode)
129
130
131 for i in range(len(sourcelanguageNode)):
132 del sourcelanguageNode[0]
133 sourcelanguageNode.text = None
134
135 strelem_to_xml(sourcelanguageNode, value[0])
136
143 rich_source = property(get_rich_source, set_rich_source)
144
162
163
170 rich_target = property(get_rich_target, set_rich_target)
171
172 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
173 """Adds an alt-trans tag and alt-trans components to the unit.
174
175 @type txt: String
176 @param txt: Alternative translation of the source text.
177 """
178
179
180
181 if isinstance(txt, str):
182 txt = txt.decode("utf-8")
183 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans"))
184 lisa.setXMLspace(alttrans, "preserve")
185 if sourcetxt:
186 if isinstance(sourcetxt, str):
187 sourcetxt = sourcetxt.decode("utf-8")
188 altsource = etree.SubElement(alttrans, self.namespaced("source"))
189 altsource.text = sourcetxt
190 alttarget = etree.SubElement(alttrans, self.namespaced("target"))
191 alttarget.text = txt
192 if matchquality:
193 alttrans.set("match-quality", matchquality)
194 if origin:
195 alttrans.set("origin", origin)
196 if lang:
197 lisa.setXMLlang(alttrans, lang)
198
225
227 """Removes the supplied alternative from the list of alt-trans tags"""
228 self.xmlelement.remove(alternative.xmlelement)
229
230 - def addnote(self, text, origin=None, position="append"):
231 """Add a note specifically in a "note" tag"""
232 if position != "append":
233 self.removenotes(origin=origin)
234
235 if text:
236 text = text.strip()
237 if not text:
238 return
239 if isinstance(text, str):
240 text = text.decode("utf-8")
241 note = etree.SubElement(self.xmlelement, self.namespaced("note"))
242 note.text = text
243 if origin:
244 note.set("from", origin)
245
247 """Private method that returns the text from notes matching 'origin' or all notes."""
248 notenodes = self.xmlelement.iterdescendants(self.namespaced("note"))
249
250
251
252 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)]
253
254
255 dictset = {}
256 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset]
257
258 return notelist
259
262
264 """Remove all the translator notes."""
265 notes = self.xmlelement.iterdescendants(self.namespaced("note"))
266 for note in notes:
267 if self.correctorigin(note, origin=origin):
268 self.xmlelement.remove(note)
269
270 - def adderror(self, errorname, errortext):
271 """Adds an error message to this unit."""
272
273 text = errorname
274 if errortext:
275 text += ': ' + errortext
276 self.addnote(text, origin="pofilter")
277
279 """Get all error messages."""
280
281 notelist = self.getnotelist(origin="pofilter")
282 errordict = {}
283 for note in notelist:
284 errorname, errortext = note.split(': ')
285 errordict[errorname] = errortext
286 return errordict
287
309
326
328 """States whether this unit is approved."""
329 return self.xmlelement.get("approved") == "yes"
330
332 """Mark this unit as approved."""
333 if value:
334 self.xmlelement.set("approved", "yes")
335 elif self.isapproved():
336 self.xmlelement.set("approved", "no")
337
341
351
358
369
370 - def settarget(self, text, lang='xx', append=False):
375
376
377
378
379
380
381
382
384 value = self.xmlelement.get("translate")
385 if value and value.lower() == 'no':
386 return False
387 return True
388
393
397
410
413
415 id_attr = unicode(self.xmlelement.get("id") or u"")
416
417
418
419 if id_attr and id_attr != self.source:
420 return [id_attr]
421 return []
422
423 - def createcontextgroup(self, name, contexts=None, purpose=None):
424 """Add the context group to the trans-unit with contexts a list with
425 (type, text) tuples describing each context."""
426 assert contexts
427 group = etree.Element(self.namespaced("context-group"))
428
429
430
431 if self.xmlelement.tag == self.namespaced("group"):
432 self.xmlelement.insert(0, group)
433 else:
434 self.xmlelement.append(group)
435 group.set("name", name)
436 if purpose:
437 group.set("purpose", purpose)
438 for type, text in contexts:
439 if isinstance(text, str):
440 text = text.decode("utf-8")
441 context = etree.SubElement(group, self.namespaced("context"))
442 context.text = text
443 context.set("context-type", type)
444
445 - def getcontextgroups(self, name):
446 """Returns the contexts in the context groups with the specified name"""
447 groups = []
448 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group"))
449
450 for group in grouptags:
451 if group.get("name") == name:
452 contexts = group.iterdescendants(self.namespaced("context"))
453 pairs = []
454 for context in contexts:
455 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space))))
456 groups.append(pairs)
457 return groups
458
460 """returns the restype attribute in the trans-unit tag"""
461 return self.xmlelement.get("restype")
462
463 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
474
476 """Check against node tag's origin (e.g note or alt-trans)"""
477 if origin == None:
478 return True
479 elif origin in node.get("from", ""):
480 return True
481 elif origin in node.get("origin", ""):
482 return True
483 else:
484 return False
485
487 """Override L{TranslationUnit.multistring_to_rich} which is used by the
488 C{rich_source} and C{rich_target} properties."""
489 strings = mstr
490 if isinstance(mstr, multistring):
491 strings = mstr.strings
492 elif isinstance(mstr, basestring):
493 strings = [mstr]
494
495 return [xml_to_strelem(s) for s in strings]
496 multistring_to_rich = classmethod(multistring_to_rich)
497
499 """Override L{TranslationUnit.rich_to_multistring} which is used by the
500 C{rich_source} and C{rich_target} properties."""
501 return multistring([unicode(elem) for elem in elem_list])
502 rich_to_multistring = classmethod(rich_to_multistring)
503
504
506 """Class representing a XLIFF file store."""
507 UnitClass = xliffunit
508 Name = _("XLIFF Translation File")
509 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"]
510 Extensions = ["xlf", "xliff", "sdlxliff"]
511 rootNode = "xliff"
512 bodyNode = "body"
513 XMLskeleton = '''<?xml version="1.0" ?>
514 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'>
515 <file original='NoName' source-language='en' datatype='plaintext'>
516 <body>
517 </body>
518 </file>
519 </xliff>'''
520 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
521 suggestions_in_format = True
522 """xliff units have alttrans tags which can be used to store suggestions"""
523
525 self._filename = None
526 lisa.LISAfile.__init__(self, *args, **kwargs)
527 self._messagenum = 0
528
529 - def initbody(self):
530 self.namespace = self.document.getroot().nsmap.get(None, None)
531
532 if self._filename:
533 filenode = self.getfilenode(self._filename, createifmissing=True)
534 else:
535 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
536 self.body = self.getbodynode(filenode, createifmissing=True)
537
539 """Initialise the file header."""
540 pass
541
542 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
567
569 """returns the name of the given file"""
570 return filenode.get("original")
571
573 """set the name of the given file"""
574 return filenode.set("original", filename)
575
577 """returns all filenames in this XLIFF file"""
578 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
579 filenames = [self.getfilename(filenode) for filenode in filenodes]
580 filenames = filter(None, filenames)
581 if len(filenames) == 1 and filenames[0] == '':
582 filenames = []
583 return filenames
584
585 - def getfilenode(self, filename, createifmissing=False):
586 """finds the filenode with the given name"""
587 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
588 for filenode in filenodes:
589 if self.getfilename(filenode) == filename:
590 return filenode
591 if createifmissing:
592 filenode = self.createfilenode(filename)
593 return filenode
594 return None
595
596 - def getids(self, filename=None):
597 if not filename:
598 return super(xlifffile, self).getids()
599
600 self.id_index = {}
601 prefix = filename + ID_SEPARATOR
602 units = (unit for unit in self.units if unit.getid().startswith(prefix))
603 for index, unit in enumerate(units):
604 self.id_index[unit.getid()[len(prefix):]] = unit
605 return self.id_index.keys()
606
608 if not language:
609 return
610 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
611 filenode.set("source-language", language)
612
614 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
615 return filenode.get("source-language")
616 sourcelanguage = property(getsourcelanguage, setsourcelanguage)
617
619 if not language:
620 return
621 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
622 filenode.set("target-language", language)
623
625 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
626 return filenode.get("target-language")
627 targetlanguage = property(gettargetlanguage, settargetlanguage)
628
630 """Returns the datatype of the stored file. If no filename is given,
631 the datatype of the first file is given."""
632 if filename:
633 node = self.getfilenode(filename)
634 if not node is None:
635 return node.get("datatype")
636 else:
637 filenames = self.getfilenames()
638 if len(filenames) > 0 and filenames[0] != "NoName":
639 return self.getdatatype(filenames[0])
640 return ""
641
643 """Returns the date attribute for the file. If no filename is given,
644 the date of the first file is given. If the date attribute is not
645 specified, None is returned."""
646 if filename:
647 node = self.getfilenode(filename)
648 if not node is None:
649 return node.get("date")
650 else:
651 filenames = self.getfilenames()
652 if len(filenames) > 0 and filenames[0] != "NoName":
653 return self.getdate(filenames[0])
654 return None
655
657 """We want to remove the default file-tag as soon as possible if we
658 know if still present and empty."""
659 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file")))
660 if len(filenodes) > 1:
661 for filenode in filenodes:
662 if filenode.get("original") == "NoName" and \
663 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))):
664 self.document.getroot().remove(filenode)
665 break
666
668 """finds the header node for the given filenode"""
669
670 headernode = filenode.iterchildren(self.namespaced("header"))
671 try:
672 return headernode.next()
673 except StopIteration:
674 pass
675 if not createifmissing:
676 return None
677 headernode = etree.SubElement(filenode, self.namespaced("header"))
678 return headernode
679
680 - def getbodynode(self, filenode, createifmissing=False):
681 """finds the body node for the given filenode"""
682 bodynode = filenode.iterchildren(self.namespaced("body"))
683 try:
684 return bodynode.next()
685 except StopIteration:
686 pass
687 if not createifmissing:
688 return None
689 bodynode = etree.SubElement(filenode, self.namespaced("body"))
690 return bodynode
691
692 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
693 """adds the given trans-unit to the last used body node if the
694 filename has changed it uses the slow method instead (will
695 create the nodes required if asked). Returns success"""
696 if self._filename != filename:
697 if not self.switchfile(filename, createifmissing):
698 return None
699 unit = super(xlifffile, self).addsourceunit(source)
700 self._messagenum += 1
701 unit.setid("%d" % self._messagenum)
702 return unit
703
704 - def switchfile(self, filename, createifmissing=False):
705 """adds the given trans-unit (will create the nodes required if asked). Returns success"""
706 self._filename = filename
707 filenode = self.getfilenode(filename)
708 if filenode is None:
709 if not createifmissing:
710 return False
711 filenode = self.createfilenode(filename)
712 self.document.getroot().append(filenode)
713
714 self.body = self.getbodynode(filenode, createifmissing=createifmissing)
715 if self.body is None:
716 return False
717 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit"))))
718
719
720
721
722
723 return True
724
725 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
726 """adds a group tag into the specified file"""
727 if self._filename != filename:
728 if not self.switchfile(filename, createifmissing):
729 return None
730 group = etree.SubElement(self.body, self.namespaced("group"))
731 if restype:
732 group.set("restype", restype)
733 return group
734
738
750 parsestring = classmethod(parsestring)
751