1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 import copy
26 import cStringIO
27 import re
28 import urllib
29
30 from translate.lang import data
31 from translate.misc.multistring import multistring
32 from translate.misc import quote
33 from translate.misc import textwrap
34 from translate.storage import pocommon, base, poparser
35 from translate.storage.pocommon import encodingToUse
36
37 lsep = "\n#: "
38 """Seperator for #: entries"""
39
40
41
42 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
43 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
44
45
47 """Escapes a line for po format. assumes no \n occurs in the line.
48
49 @param line: unescaped text
50 """
51 special_locations = []
52 for special_key in po_escape_map:
53 special_locations.extend(quote.find_all(line, special_key))
54 special_locations = dict.fromkeys(special_locations).keys()
55 special_locations.sort()
56 escaped_line = ""
57 last_location = 0
58 for location in special_locations:
59 escaped_line += line[last_location:location]
60 escaped_line += po_escape_map[line[location:location+1]]
61 last_location = location + 1
62 escaped_line += line[last_location:]
63 return escaped_line
64
65
68
69
71 """Wrap text for po files."""
72 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
73
74
75 if len(wrappedlines) > 1:
76 for index, line in enumerate(wrappedlines[1:]):
77 if line.startswith(' '):
78
79 wrappedlines[index+1] = line[1:]
80
81
82 wrappedlines[index] += ' '
83 return wrappedlines
84
85
87 """quotes the given text for a PO file, returning quoted and escaped lines"""
88 polines = []
89 if text is None:
90 return polines
91 lines = text.split("\n")
92 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
93 if len(lines) != 2 or lines[1]:
94 polines.extend(['""'])
95 for line in lines[:-1]:
96
97 lns = wrapline(line)
98 if len(lns) > 0:
99 for ln in lns[:-1]:
100 polines.extend(['"' + escapeforpo(ln) + '"'])
101 if lns[-1]:
102 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
103 else:
104 polines.extend(['"\\n"'])
105 if lines[-1]:
106 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
107 return polines
108
109
111 """Remove quote and unescape line from po file.
112
113 @param line: a quoted line from a po file (msgid or msgstr)
114 """
115 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
116 return extracted
117
118
121
122
124 return lst == [] or len(lst) == 1 and lst[0] == '""'
125
126
128 left = string.find('"')
129 right = string.rfind('"')
130 if right > -1:
131 return string[left:right+1]
132 else:
133 return string[left:] + '"'
134
135
136 -class pounit(pocommon.pounit):
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 __shallow__ = ['_store']
152
153 - def __init__(self, source=None, encoding="UTF-8"):
166
175
183
184 allcomments = property(_get_all_comments)
185
194
212
216
218 """Sets the msgid to the given (unescaped) value.
219
220 @param source: an unescaped source string.
221 """
222 self._rich_source = None
223 self.msgid, self.msgid_plural = self._set_source_vars(source)
224 source = property(getsource, setsource)
225
227 """Returns the unescaped msgid"""
228 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
229
231 """Sets the msgid to the given (unescaped) value.
232
233 @param source: an unescaped source string.
234 """
235 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
236 prev_source = property(_get_prev_source, _set_prev_source)
237
245
247 """Sets the msgstr to the given (unescaped) value"""
248 self._rich_target = None
249 if isinstance(target, str):
250 target = target.decode(self._encoding)
251 if self.hasplural():
252 if isinstance(target, multistring):
253 target = target.strings
254 elif isinstance(target, basestring):
255 target = [target]
256 elif isinstance(target, (dict, list)):
257 if len(target) == 1:
258 target = target[0]
259 else:
260 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
261 templates = self.msgstr
262 if isinstance(templates, list):
263 templates = {0: templates}
264 if isinstance(target, list):
265 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
266 elif isinstance(target, dict):
267 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
268 else:
269 self.msgstr = quoteforpo(target)
270 target = property(gettarget, settarget)
271
273 """Return a list of alternate units.
274
275 Previous msgid and current msgstr is combined to form a single
276 alternative unit."""
277 prev_source = self.prev_source
278 if prev_source and self.isfuzzy():
279 unit = type(self)(prev_source)
280 unit.target = self.target
281
282
283
284 unit.xmlelement = dict()
285 return [unit]
286 return []
287
289 """Return comments based on origin value (programmer, developer, source code and translator)"""
290 if origin == None:
291 comments = u"".join([comment[2:] for comment in self.othercomments])
292 comments += u"".join([comment[3:] for comment in self.automaticcomments])
293 elif origin == "translator":
294 comments = u"".join([comment[2:] for comment in self.othercomments])
295 elif origin in ["programmer", "developer", "source code"]:
296 comments = u"".join([comment[3:] for comment in self.automaticcomments])
297 else:
298 raise ValueError("Comment type not valid")
299
300 return comments[:-1]
301
302 - def addnote(self, text, origin=None, position="append"):
303 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
304
305 if not (text and text.strip()):
306 return
307 text = data.forceunicode(text)
308 commentlist = self.othercomments
309 linestart = "# "
310 autocomments = False
311 if origin in ["programmer", "developer", "source code"]:
312 autocomments = True
313 commentlist = self.automaticcomments
314 linestart = "#. "
315 text = text.split("\n")
316 newcomments = [linestart + line + "\n" for line in text]
317 if position == "append":
318 newcomments = commentlist + newcomments
319 elif position == "prepend":
320 newcomments = newcomments + commentlist
321
322 if autocomments:
323 self.automaticcomments = newcomments
324 else:
325 self.othercomments = newcomments
326
328 """Remove all the translator's notes (other comments)"""
329 self.othercomments = []
330
332
333 new_unit = self.__class__()
334
335
336 shallow = set(self.__shallow__)
337
338 for key, value in self.__dict__.iteritems():
339 if key not in shallow:
340 setattr(new_unit, key, copy.deepcopy(value))
341
342 for key in set(shallow):
343 setattr(new_unit, key, getattr(self, key))
344
345
346 memo[id(self)] = self
347
348 return new_unit
349
351 return copy.deepcopy(self)
352
358
365
366 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
367 """Merges the otherpo (with the same msgid) into this one.
368
369 Overwrite non-blank self.msgstr only if overwrite is True
370 merge comments only if comments is True
371 """
372
373 def mergelists(list1, list2, split=False):
374
375 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
376 for position, item in enumerate(list1):
377 if isinstance(item, str):
378 list1[position] = item.decode("utf-8")
379 for position, item in enumerate(list2):
380 if isinstance(item, str):
381 list2[position] = item.decode("utf-8")
382
383
384 lineend = ""
385 if list1 and list1[0]:
386 for candidate in ["\n", "\r", "\n\r"]:
387 if list1[0].endswith(candidate):
388 lineend = candidate
389 if not lineend:
390 lineend = ""
391 else:
392 lineend = "\n"
393
394
395 if split:
396 splitlist1 = []
397 splitlist2 = []
398 prefix = "#"
399 for item in list1:
400 splitlist1.extend(item.split()[1:])
401 prefix = item.split()[0]
402 for item in list2:
403 splitlist2.extend(item.split()[1:])
404 prefix = item.split()[0]
405 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
406 else:
407
408 if list1 != list2:
409 for item in list2:
410 if lineend:
411 item = item.rstrip() + lineend
412
413 if item not in list1 or len(item) < 5:
414 list1.append(item)
415 if not isinstance(otherpo, pounit):
416 super(pounit, self).merge(otherpo, overwrite, comments)
417 return
418 if comments:
419 mergelists(self.othercomments, otherpo.othercomments)
420 mergelists(self.typecomments, otherpo.typecomments)
421 if not authoritative:
422
423
424 mergelists(self.automaticcomments, otherpo.automaticcomments)
425 mergelists(self.msgidcomments, otherpo.msgidcomments)
426 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
427 if not self.istranslated() or overwrite:
428
429 if self._extract_msgidcomments(otherpo.target):
430 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '')
431 self.target = otherpo.target
432 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
433 self.markfuzzy()
434 else:
435 self.markfuzzy(otherpo.isfuzzy())
436 elif not otherpo.istranslated():
437 if self.source != otherpo.source:
438 self.markfuzzy()
439 else:
440 if self.target != otherpo.target:
441 self.markfuzzy()
442
444
445
446 return (is_null(self.msgid)
447 and not is_null(self.msgstr)
448 and self.msgidcomments == []
449 and is_null(self.msgctxt))
450
452 if self.isheader() or len(self.msgidcomments):
453 return False
454 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
455 return True
456 return False
457
458
459
460
465
473
487
490
498
501
507
510
512 """Makes this unit obsolete"""
513 super(pounit, self).makeobsolete()
514 self.obsolete = True
515 self.sourcecomments = []
516 self.automaticcomments = []
517
519 """Makes an obsolete unit normal"""
520 super(pounit, self).resurrect()
521 self.obsolete = False
522
524 """returns whether this pounit contains plural strings..."""
525 return len(self.msgid_plural) > 0
526
529
531 if isinstance(partlines, dict):
532 partkeys = partlines.keys()
533 partkeys.sort()
534 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
535 partstr = partname + " "
536 partstartline = 0
537 if len(partlines) > 0 and len(partcomments) == 0:
538 partstr += partlines[0]
539 partstartline = 1
540 elif len(partcomments) > 0:
541 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
542
543 partstr += partlines[0] + '\n'
544
545 if len(partlines) > 1:
546 partstartline += 1
547 else:
548
549 partstr += '""\n'
550
551 if len(partcomments) > 1:
552 combinedcomment = []
553 for comment in partcomments:
554 comment = unquotefrompo([comment])
555 if comment.startswith("_:"):
556 comment = comment[len("_:"):]
557 if comment.endswith("\\n"):
558 comment = comment[:-len("\\n")]
559
560 combinedcomment.append(comment)
561 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
562
563 partstr += "\n".join(partcomments)
564 partstr = quote.rstripeol(partstr)
565 else:
566 partstr += '""'
567 partstr += '\n'
568
569 for partline in partlines[partstartline:]:
570 partstr += partline + '\n'
571 return partstr
572
574 """encodes unicode strings and returns other strings unchanged"""
575 if isinstance(output, unicode):
576 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8"))
577 return output.encode(encoding)
578 return output
579
581 """convert to a string. double check that unicode is handled somehow here"""
582 output = self._getoutput()
583 return self._encodeifneccessary(output)
584
586 """return this po element as a string"""
587
588 def add_prev_msgid_lines(lines, prefix, header, var):
589 if len(var) > 0:
590 lines.append("%s %s %s\n" % (prefix, header, var[0]))
591 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
592
593 def add_prev_msgid_info(lines, prefix):
594 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt)
595 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid)
596 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural)
597
598 lines = []
599 lines.extend(self.othercomments)
600 if self.isobsolete():
601 lines.extend(self.typecomments)
602 obsoletelines = []
603 add_prev_msgid_info(obsoletelines, prefix="#~|")
604 if self.msgctxt:
605 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.msgctxt))
606 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.msgid, self.msgidcomments))
607 if self.msgid_plural or self.msgid_pluralcomments:
608 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
609 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.msgstr))
610 for index, obsoleteline in enumerate(obsoletelines):
611
612 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
613 lines.extend(obsoletelines)
614 return u"".join(lines)
615
616
617 if is_null(self.msgid):
618 if not (self.isheader() or self.getcontext() or self.sourcecomments):
619 return u"".join(lines)
620 lines.extend(self.automaticcomments)
621 lines.extend(self.sourcecomments)
622 lines.extend(self.typecomments)
623 add_prev_msgid_info(lines, prefix="#|")
624 if self.msgctxt:
625 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt))
626 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments))
627 if self.msgid_plural or self.msgid_pluralcomments:
628 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
629 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr))
630 postr = u"".join(lines)
631 return postr
632
634 """Get a list of locations from sourcecomments in the PO unit
635
636 rtype: List
637 return: A list of the locations with '#: ' stripped
638
639 """
640 locations = []
641 for sourcecomment in self.sourcecomments:
642 locations += quote.rstripeol(sourcecomment)[3:].split()
643 for i, loc in enumerate(locations):
644 locations[i] = urllib.unquote_plus(loc)
645 return locations
646
648 """Add a location to sourcecomments in the PO unit
649
650 @param location: Text location e.g. 'file.c:23' does not include #:
651 @type location: String
652
653 """
654 if location.find(" ") != -1:
655 location = urllib.quote_plus(location)
656 self.sourcecomments.append("#: %s\n" % location)
657
668
674
675 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
676
677 - def getcontext(self):
678 """Get the message context."""
679 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
680
681 - def setcontext(self, context):
682 context = data.forceunicode(context)
683 self.msgctxt = quoteforpo(context)
684
686 """Returns a unique identifier for this unit."""
687 context = self.getcontext()
688
689
690
691
692
693 id = self.source
694 if self.msgidcomments:
695 id = u"_: %s\n%s" % (context, id)
696 elif context:
697 id = u"%s\04%s" % (context, id)
698 return id
699
700
701 -class pofile(pocommon.pofile):
702 """A .po file containing various units"""
703 UnitClass = pounit
704
706 """Parses the given file or file source string."""
707 if True:
708
709 if hasattr(input, 'name'):
710 self.filename = input.name
711 elif not getattr(self, 'filename', ''):
712 self.filename = ''
713 if isinstance(input, str):
714 input = cStringIO.StringIO(input)
715
716 self.units = []
717 poparser.parse_units(poparser.ParseState(input, pounit), self)
718
719
720
722 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
723
724
725 id_dict = {}
726 uniqueunits = []
727
728
729 markedpos = []
730
731 def addcomment(thepo):
732 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
733 markedpos.append(thepo)
734 for thepo in self.units:
735 id = thepo.getid()
736 if thepo.isheader() and not thepo.getlocations():
737
738 uniqueunits.append(thepo)
739 elif id in id_dict:
740 if duplicatestyle == "merge":
741 if id:
742 id_dict[id].merge(thepo)
743 else:
744 addcomment(thepo)
745 uniqueunits.append(thepo)
746 elif duplicatestyle == "msgctxt":
747 origpo = id_dict[id]
748 if origpo not in markedpos:
749 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
750 markedpos.append(thepo)
751 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
752 uniqueunits.append(thepo)
753 else:
754 if not id:
755 if duplicatestyle == "merge":
756 addcomment(thepo)
757 else:
758 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
759 id_dict[id] = thepo
760 uniqueunits.append(thepo)
761 self.units = uniqueunits
762
764 """Convert to a string. double check that unicode is handled somehow here"""
765 output = self._getoutput()
766 if isinstance(output, unicode):
767 try:
768 return output.encode(getattr(self, "_encoding", "UTF-8"))
769 except UnicodeEncodeError, e:
770 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8")
771 self._encoding = "UTF-8"
772 for unit in self.units:
773 unit._encoding = "UTF-8"
774 return self._getoutput().encode("UTF-8")
775
776 return output
777
779 """convert the units back to lines"""
780 lines = []
781 for unit in self.units:
782 unitsrc = unit._getoutput() + u"\n"
783 lines.append(unitsrc)
784 lines = u"".join(lines).rstrip()
785
786 if lines:
787 lines += u"\n"
788 return lines
789
801
803 """decode any non-unicode strings in lines with self._encoding"""
804 newlines = []
805 for line in lines:
806 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
807 try:
808 line = line.decode(self._encoding)
809 except UnicodeError, e:
810 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
811 newlines.append(line)
812 return newlines
813
815 for unit in self.units:
816 if not (unit.isheader() or unit.isobsolete()):
817 yield unit
818