1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .po files (pounit) or entire files (pofile).
23
24 Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
25 many other projects.
26
27 This uses libgettextpo from the gettext package. Any version before 0.17 will
28 at least cause some subtle bugs or may not work at all. Developers might want
29 to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
30 package for the public API of the library.
31 """
32
33 from ctypes import c_size_t, c_int, c_uint, c_char_p, c_long, CFUNCTYPE, POINTER
34 from ctypes import Structure, cdll
35 import ctypes.util
36 import os
37 import re
38 import sys
39 import tempfile
40 import urllib
41
42 from translate.lang import data
43 from translate.misc.multistring import multistring
44 from translate.storage import base, pocommon
45 from translate.storage import pypo
46 from translate.storage.pocommon import encodingToUse
47
48 lsep = " "
49 """Seperator for #: entries"""
50
51 STRING = c_char_p
52
53
54
57
58
59 xerror_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
60 xerror2_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
61
62
63
67
68
70 _fields_ = [
71 ('error', CFUNCTYPE(None, c_int, c_int, STRING)),
72 ('error_at_line', CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)),
73 ('multiline_warning', CFUNCTYPE(None, STRING, STRING)),
74 ('multiline_error', CFUNCTYPE(None, STRING, STRING)),
75 ]
76
77
78
79 -def xerror_cb(severity, message, filename, lineno, column, multilint_p, message_text):
80 print >> sys.stderr, "xerror_cb", severity, message, filename, lineno, column, multilint_p, message_text
81 if severity >= 1:
82 raise ValueError(message_text)
83
84
85 -def xerror2_cb(severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2):
86 print >> sys.stderr, "xerror2_cb", severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2
87 if severity >= 1:
88 raise ValueError(message_text1)
89
90
91
92 gpo = None
93
94
95 names = ['gettextpo', 'libgettextpo']
96 for name in names:
97 lib_location = ctypes.util.find_library(name)
98 if lib_location:
99 gpo = cdll.LoadLibrary(lib_location)
100 if gpo:
101 break
102 else:
103
104
105 try:
106 gpo = cdll.LoadLibrary('libgettextpo.so')
107 except OSError, e:
108 raise ImportError("gettext PO library not found")
109
110
111
112 gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)]
113 gpo.po_file_write_v2.argtypes = [c_int, STRING, POINTER(po_xerror_handler)]
114 gpo.po_file_write_v2.retype = c_int
115
116
117 gpo.po_file_domain_header.restype = STRING
118 gpo.po_header_field.restype = STRING
119 gpo.po_header_field.argtypes = [STRING, STRING]
120
121
122 gpo.po_filepos_file.restype = STRING
123 gpo.po_message_filepos.restype = c_int
124 gpo.po_message_filepos.argtypes = [c_int, c_int]
125 gpo.po_message_add_filepos.argtypes = [c_int, STRING, c_size_t]
126
127
128 gpo.po_message_comments.restype = STRING
129 gpo.po_message_extracted_comments.restype = STRING
130 gpo.po_message_prev_msgctxt.restype = STRING
131 gpo.po_message_prev_msgid.restype = STRING
132 gpo.po_message_prev_msgid_plural.restype = STRING
133 gpo.po_message_is_format.restype = c_int
134 gpo.po_message_is_format.argtypes = [c_int, STRING]
135 gpo.po_message_set_format.argtypes = [c_int, STRING, c_int]
136 gpo.po_message_msgctxt.restype = STRING
137 gpo.po_message_msgid.restype = STRING
138 gpo.po_message_msgid_plural.restype = STRING
139 gpo.po_message_msgstr.restype = STRING
140 gpo.po_message_msgstr_plural.restype = STRING
141
142
143 gpo.po_message_set_comments.argtypes = [c_int, STRING]
144 gpo.po_message_set_extracted_comments.argtypes = [c_int, STRING]
145 gpo.po_message_set_fuzzy.argtypes = [c_int, c_int]
146 gpo.po_message_set_msgctxt.argtypes = [c_int, STRING]
147
148
149 xerror_handler = po_xerror_handler()
150 xerror_handler.xerror = xerror_prototype(xerror_cb)
151 xerror_handler.xerror2 = xerror2_prototype(xerror2_cb)
152
153
156
157
160
161
164
165
167 """Returns the libgettextpo version
168
169 @rtype: three-value tuple
170 @return: libgettextpo version in the following format::
171 (major version, minor version, subminor version)
172 """
173 libversion = c_long.in_dll(gpo, 'libgettextpo_version')
174 major = libversion.value >> 16
175 minor = libversion.value >> 8
176 subminor = libversion.value - (major << 16) - (minor << 8)
177 return major, minor, subminor
178
179
180 -class pounit(pocommon.pounit):
181
182 - def __init__(self, source=None, encoding='utf-8', gpo_message=None):
183 self._rich_source = None
184 self._rich_target = None
185 self._encoding = encoding or 'utf-8'
186 if not gpo_message:
187 self._gpo_message = gpo.po_message_create()
188 if source or source == "":
189 self.source = source
190 self.target = ""
191 elif gpo_message:
192 self._gpo_message = gpo_message
193 self.infer_state()
194
205
210 msgid_plural = property(None, setmsgid_plural)
211
213
214 def remove_msgid_comments(text):
215 if not text:
216 return text
217 if text.startswith("_:"):
218 remainder = re.search(r"_: .*\n(.*)", text)
219 if remainder:
220 return remainder.group(1)
221 else:
222 return u""
223 else:
224 return text
225 singular = remove_msgid_comments((gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding))
226 if singular:
227 if self.hasplural():
228 multi = multistring(singular, self._encoding)
229 pluralform = (gpo.po_message_msgid_plural(self._gpo_message) or "").decode(self._encoding)
230 multi.strings.append(pluralform)
231 return multi
232 else:
233 return singular
234 else:
235 return u""
236
249 source = property(getsource, setsource)
250
252 if self.hasplural():
253 plurals = []
254 nplural = 0
255 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
256 while plural:
257 plurals.append(plural.decode(self._encoding))
258 nplural += 1
259 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
260 if plurals:
261 multi = multistring(plurals, encoding=self._encoding)
262 else:
263 multi = multistring(u"")
264 else:
265 multi = (gpo.po_message_msgstr(self._gpo_message) or "").decode(self._encoding)
266 return multi
267
269
270 if self.hasplural():
271 if isinstance(target, multistring):
272 target = target.strings
273 elif isinstance(target, basestring):
274 target = [target]
275
276 elif isinstance(target, (dict, list)):
277 if len(target) == 1:
278 target = target[0]
279 else:
280 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
281
282
283
284
285
286 if isinstance(target, (dict, list)):
287 i = 0
288 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
289 while message is not None:
290 gpo.po_message_set_msgstr_plural(self._gpo_message, i, None)
291 i += 1
292 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
293
294 if isinstance(target, list):
295 for i in range(len(target)):
296 targetstring = target[i]
297 if isinstance(targetstring, unicode):
298 targetstring = targetstring.encode(self._encoding)
299 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
300
301 elif isinstance(target, dict):
302 for i, targetstring in enumerate(target.itervalues()):
303 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
304
305 else:
306 if isinstance(target, unicode):
307 target = target.encode(self._encoding)
308 if target is None:
309 gpo.po_message_set_msgstr(self._gpo_message, "")
310 else:
311 gpo.po_message_set_msgstr(self._gpo_message, target)
312 target = property(gettarget, settarget)
313
315 """The unique identifier for this unit according to the convensions in
316 .mo files."""
317 id = (gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding)
318
319
320
321
322
323
324
325 context = gpo.po_message_msgctxt(self._gpo_message)
326 if context:
327 id = u"%s\04%s" % (context.decode(self._encoding), id)
328 return id
329
331 if origin == None:
332 comments = gpo.po_message_comments(self._gpo_message) + \
333 gpo.po_message_extracted_comments(self._gpo_message)
334 elif origin == "translator":
335 comments = gpo.po_message_comments(self._gpo_message)
336 elif origin in ["programmer", "developer", "source code"]:
337 comments = gpo.po_message_extracted_comments(self._gpo_message)
338 else:
339 raise ValueError("Comment type not valid")
340
341 if comments and get_libgettextpo_version() < (0, 17, 0):
342 comments = "\n".join([line for line in comments.split("\n")])
343
344 return comments[:-1].decode(self._encoding)
345
346 - def addnote(self, text, origin=None, position="append"):
347
348 if not (text and text.strip()):
349 return
350 text = data.forceunicode(text)
351 oldnotes = self.getnotes(origin)
352 newnotes = None
353 if oldnotes:
354 if position == "append":
355 newnotes = oldnotes + "\n" + text
356 elif position == "merge":
357 if oldnotes != text:
358 oldnoteslist = oldnotes.split("\n")
359 for newline in text.split("\n"):
360 newline = newline.rstrip("\r")
361
362 if newline not in oldnotes or len(newline) < 5:
363 oldnoteslist.append(newline)
364 newnotes = "\n".join(oldnoteslist)
365 else:
366 newnotes = text + '\n' + oldnotes
367 else:
368 newnotes = "\n".join([line.rstrip("\r") for line in text.split("\n")])
369
370 if newnotes:
371 newlines = []
372 needs_space = get_libgettextpo_version() < (0, 17, 0)
373 for line in newnotes.split("\n"):
374 if line and needs_space:
375 newlines.append(" " + line)
376 else:
377 newlines.append(line)
378 newnotes = "\n".join(newlines).encode(self._encoding)
379 if origin in ["programmer", "developer", "source code"]:
380 gpo.po_message_set_extracted_comments(self._gpo_message, newnotes)
381 else:
382 gpo.po_message_set_comments(self._gpo_message, newnotes)
383
385 gpo.po_message_set_comments(self._gpo_message, "")
386
388 newpo = self.__class__()
389 newpo._gpo_message = self._gpo_message
390 return newpo
391
392 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
426
428
429
430 return self.getid() == "" and len(self.target) > 0
431
434
437
440
447
449 return gpo.po_message_is_fuzzy(self._gpo_message)
450
452 gpo.po_message_set_fuzzy(self._gpo_message, present)
453
455
456
457 gpo.po_message_set_obsolete(self._gpo_message, True)
458 self.infer_state()
459
461 gpo.po_message_set_obsolete(self._gpo_message, False)
462 self.infer_state()
463
465 return gpo.po_message_msgid_plural(self._gpo_message) is not None
466
478
482 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
483
485 pf = pofile(noheader=True)
486 pf.addunit(self)
487 return str(pf)
488
490 locations = []
491 i = 0
492 location = gpo.po_message_filepos(self._gpo_message, i)
493 while location:
494 locname = gpo.po_filepos_file(location)
495 locline = gpo.po_filepos_start_line(location)
496 if locline == -1:
497 locstring = locname
498 else:
499 locstring = locname + ":" + str(locline)
500 locations.append(urllib.unquote_plus(locstring))
501 i += 1
502 location = gpo.po_message_filepos(self._gpo_message, i)
503 return locations
504
506 if location.find(" ") != -1:
507 location = urllib.quote_plus(location)
508 parts = location.split(":")
509 file = parts[0]
510 if len(parts) == 2:
511 line = int(parts[1] or "0")
512 else:
513 line = -1
514 gpo.po_message_add_filepos(self._gpo_message, file, line)
515
516 - def getcontext(self):
517 msgctxt = gpo.po_message_msgctxt(self._gpo_message)
518 if msgctxt:
519 return msgctxt.decode(self._encoding)
520 else:
521 msgidcomment = self._extract_msgidcomments()
522 return msgidcomment
523
524 - def setcontext(self, context):
525 context = data.forceunicode(context)
526 gpo.po_message_set_msgctxt(self._gpo_message, context)
527
562 buildfromunit = classmethod(buildfromunit)
563
564
565 -class pofile(pocommon.pofile):
566 UnitClass = pounit
567
568 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit, noheader=False):
569 self._gpo_memory_file = None
570 self._gpo_message_iterator = None
571 self.units = []
572 self.sourcelanguage = None
573 self.targetlanguage = None
574 self._encoding = 'utf-8'
575 if inputfile is None:
576 self._gpo_memory_file = gpo.po_file_create()
577 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
578 if not noheader:
579 self.init_headers()
580 else:
581 super(pofile, self).__init__(inputfile=inputfile, encoding=encoding)
582
583 - def addunit(self, unit, new=True):
584 if new:
585 gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message)
586 super(pofile, self).addunit(unit)
587
589 header._store = self
590 self.units.insert(0, header)
591 gpo.po_message_iterator_free(self._gpo_message_iterator)
592 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
593 gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message)
594 while gpo.po_next_message(self._gpo_message_iterator):
595 pass
596
598 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
599
600
601 id_dict = {}
602 uniqueunits = []
603
604
605 markedpos = []
606
607 def addcomment(thepo):
608 thepo.msgidcomment = " ".join(thepo.getlocations())
609 markedpos.append(thepo)
610 for thepo in self.units:
611 id = thepo.getid()
612 if thepo.isheader() and not thepo.getlocations():
613
614 uniqueunits.append(thepo)
615 elif id in id_dict:
616 if duplicatestyle == "merge":
617 if id:
618 id_dict[id].merge(thepo)
619 else:
620 addcomment(thepo)
621 uniqueunits.append(thepo)
622 elif duplicatestyle == "msgctxt":
623 origpo = id_dict[id]
624 if origpo not in markedpos:
625 gpo.po_message_set_msgctxt(origpo._gpo_message, " ".join(origpo.getlocations()))
626 markedpos.append(thepo)
627 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
628 uniqueunits.append(thepo)
629 else:
630 if not id:
631 if duplicatestyle == "merge":
632 addcomment(thepo)
633 else:
634 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
635 id_dict[id] = thepo
636 uniqueunits.append(thepo)
637 new_gpo_memory_file = gpo.po_file_create()
638 new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None)
639 for unit in uniqueunits:
640 gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message)
641 gpo.po_message_iterator_free(self._gpo_message_iterator)
642 self._gpo_message_iterator = new_gpo_message_iterator
643 self._gpo_memory_file = new_gpo_memory_file
644 self.units = uniqueunits
645
647
648 def obsolete_workaround():
649
650
651
652 for unit in self.units:
653 if unit.isobsolete():
654 gpo.po_message_set_extracted_comments(unit._gpo_message, "")
655 location = gpo.po_message_filepos(unit._gpo_message, 0)
656 while location:
657 gpo.po_message_remove_filepos(unit._gpo_message, 0)
658 location = gpo.po_message_filepos(unit._gpo_message, 0)
659 outputstring = ""
660 if self._gpo_memory_file:
661 obsolete_workaround()
662 f, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
663 os.close(f)
664 self._gpo_memory_file = gpo.po_file_write_v2(self._gpo_memory_file, fname, xerror_handler)
665 f = open(fname)
666 outputstring = f.read()
667 f.close()
668 os.remove(fname)
669 return outputstring
670
672 """Returns True if the object doesn't contain any translation units."""
673 if len(self.units) == 0:
674 return True
675
676 if self.units[0].isheader():
677 units = self.units[1:]
678 else:
679 units = self.units
680
681 for unit in units:
682 if not unit.isblank() and not unit.isobsolete():
683 return False
684 return True
685
687 if hasattr(input, 'name'):
688 self.filename = input.name
689 elif not getattr(self, 'filename', ''):
690 self.filename = ''
691
692 if hasattr(input, "read"):
693 posrc = input.read()
694 input.close()
695 input = posrc
696
697 needtmpfile = not os.path.isfile(input)
698 if needtmpfile:
699
700 fd, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
701 os.write(fd, input)
702 input = fname
703 os.close(fd)
704
705 self._gpo_memory_file = gpo.po_file_read_v3(input, xerror_handler)
706 if self._gpo_memory_file is None:
707 print >> sys.stderr, "Error:"
708
709 if needtmpfile:
710 os.remove(input)
711
712 self.units = []
713
714 self._header = gpo.po_file_domain_header(self._gpo_memory_file, None)
715 if self._header:
716 charset = gpo.po_header_field(self._header, "Content-Type")
717 if charset:
718 charset = re.search("charset=([^\\s]+)", charset).group(1)
719 self._encoding = encodingToUse(charset)
720 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
721 newmessage = gpo.po_next_message(self._gpo_message_iterator)
722 while newmessage:
723 newunit = pounit(gpo_message=newmessage, encoding=self._encoding)
724 self.addunit(newunit, new=False)
725 newmessage = gpo.po_next_message(self._gpo_message_iterator)
726 self._free_iterator()
727
729
730
731 return
732 self._free_iterator()
733 if self._gpo_memory_file is not None:
734 gpo.po_file_free(self._gpo_memory_file)
735 self._gpo_memory_file = None
736
738
739
740 return
741 if self._gpo_message_iterator is not None:
742 gpo.po_message_iterator_free(self._gpo_message_iterator)
743 self._gpo_message_iterator = None
744