1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Classes for the support of Gettext .po and .pot files.
22
23 This implementation assumes that cpo is working. This should not be used
24 directly, but can be used once cpo has been established to work."""
25
26
27
28
29
30
31 import re
32 import copy
33 import cStringIO
34 import urllib
35
36 from translate.lang import data
37 from translate.misc.multistring import multistring
38 from translate.storage import pocommon, base, cpo, poparser
39 from translate.storage.pocommon import encodingToUse
40
41 lsep = " "
42 """Seperator for #: entries"""
43
44 basic_header = r'''msgid ""
45 msgstr ""
46 "Content-Type: text/plain; charset=UTF-8\n"
47 "Content-Transfer-Encoding: 8bit\n"
48 '''
49
50
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 __shallow__ = ['_store']
67
68 - def __init__(self, source=None, encoding="UTF-8"):
75
84
87
100 source = property(getsource, setsource)
101
103 """Returns the unescaped msgstr"""
104 return self._target
105
107 """Sets the msgstr to the given (unescaped) value"""
108 self._rich_target = None
109
110
111 if self.hasplural():
112 if isinstance(target, multistring):
113 self._target = target
114 else:
115
116 self._target = multistring(target)
117 elif isinstance(target, (dict, list)):
118 if len(target) == 1:
119 self._target = target[0]
120 else:
121 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
122 else:
123 self._target = target
124 target = property(gettarget, settarget)
125
127 """Return comments based on origin value (programmer, developer, source code and translator)"""
128 if origin == None:
129 comments = u"\n".join(self.othercomments)
130 comments += u"\n".join(self.automaticcomments)
131 elif origin == "translator":
132 comments = u"\n".join(self.othercomments)
133 elif origin in ["programmer", "developer", "source code"]:
134 comments = u"\n".join(self.automaticcomments)
135 else:
136 raise ValueError("Comment type not valid")
137 return comments
138
139 - def addnote(self, text, origin=None, position="append"):
140 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
141
142 if not (text and text.strip()):
143 return
144 text = data.forceunicode(text)
145 commentlist = self.othercomments
146 autocomments = False
147 if origin in ["programmer", "developer", "source code"]:
148 autocomments = True
149 commentlist = self.automaticcomments
150 if text.endswith(u'\n'):
151 text = text[:-1]
152 newcomments = text.split(u"\n")
153 if position == "append":
154 newcomments = commentlist + newcomments
155 elif position == "prepend":
156 newcomments = newcomments + commentlist
157
158 if autocomments:
159 self.automaticcomments = newcomments
160 else:
161 self.othercomments = newcomments
162
164 """Remove all the translator's notes (other comments)"""
165 self.othercomments = []
166
168
169 new_unit = self.__class__()
170
171
172 shallow = set(self.__shallow__)
173
174 for key, value in self.__dict__.iteritems():
175 if key not in shallow:
176 setattr(new_unit, key, copy.deepcopy(value))
177
178 for key in set(shallow):
179 setattr(new_unit, key, getattr(self, key))
180
181
182 memo[id(self)] = self
183
184 return new_unit
185
187 return copy.deepcopy(self)
188
194
200
201 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
202 """Merges the otherpo (with the same msgid) into this one.
203
204 Overwrite non-blank self.msgstr only if overwrite is True
205 merge comments only if comments is True
206 """
207
208 def mergelists(list1, list2, split=False):
209
210 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
211 for position, item in enumerate(list1):
212 if isinstance(item, str):
213 list1[position] = item.decode("utf-8")
214 for position, item in enumerate(list2):
215 if isinstance(item, str):
216 list2[position] = item.decode("utf-8")
217
218
219 lineend = ""
220 if list2 and list2[0]:
221 for candidate in ["\n", "\r", "\n\r"]:
222 if list2[0].endswith(candidate):
223 lineend = candidate
224 if not lineend:
225 lineend = ""
226
227
228 if split:
229 splitlist1 = []
230 splitlist2 = []
231 for item in list1:
232 splitlist1.extend(item.split())
233 for item in list2:
234 splitlist2.extend(item.split())
235 list1.extend([item for item in splitlist2 if not item in splitlist1])
236 else:
237
238 if list1 != list2:
239 for item in list2:
240 item = item.rstrip(lineend)
241
242 if item not in list1 or len(item) < 5:
243 list1.append(item)
244
245 if not isinstance(otherpo, pounit):
246 super(pounit, self).merge(otherpo, overwrite, comments)
247 return
248 if comments:
249 mergelists(self.othercomments, otherpo.othercomments)
250 mergelists(self.typecomments, otherpo.typecomments)
251 if not authoritative:
252
253
254 mergelists(self.automaticcomments, otherpo.automaticcomments)
255
256 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
257 if not self.istranslated() or overwrite:
258
259 if pocommon.extract_msgid_comment(otherpo.target):
260 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '')
261 self.target = otherpo.target
262 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
263 self.markfuzzy()
264 else:
265 self.markfuzzy(otherpo.isfuzzy())
266 elif not otherpo.istranslated():
267 if self.source != otherpo.source:
268 self.markfuzzy()
269 else:
270 if self.target != otherpo.target:
271 self.markfuzzy()
272
274
275 return not self.getid() and len(self.target) > 0
276
283
288
297
307
310
313
316
319
321 """Makes this unit obsolete"""
322 self.sourcecomments = []
323 self.automaticcomments = []
324 super(pounit, self).makeobsolete()
325
330
334
336 """convert to a string. double check that unicode is handled somehow here"""
337 _cpo_unit = cpo.pounit.buildfromunit(self)
338 return str(_cpo_unit)
339
341 """Get a list of locations from sourcecomments in the PO unit
342
343 rtype: List
344 return: A list of the locations with '#: ' stripped
345
346 """
347
348 return [urllib.unquote_plus(loc) for loc in self.sourcecomments]
349
351 """Add a location to sourcecomments in the PO unit
352
353 @param location: Text location e.g. 'file.c:23' does not include #:
354 @type location: String
355 """
356 if location.find(" ") != -1:
357 location = urllib.quote_plus(location)
358 self.sourcecomments.extend(location.split())
359
370
371 - def getcontext(self):
372 """Get the message context."""
373 return self._msgctxt + self.msgidcomment
374
375 - def setcontext(self, context):
376 context = data.forceunicode(context or u"")
377 self._msgctxt = context
378
393
426 buildfromunit = classmethod(buildfromunit)
427
428
429 -class pofile(pocommon.pofile):
430 """A .po file containing various units"""
431 UnitClass = pounit
432
434 """Deprecated: changes the encoding on the file."""
435
436
437
438 raise DeprecationWarning
439
440 self._encoding = encodingToUse(newencoding)
441 if not self.units:
442 return
443 header = self.header()
444 if not header or header.isblank():
445 return
446 charsetline = None
447 headerstr = header.target
448 for line in headerstr.split("\n"):
449 if not ":" in line:
450 continue
451 key, value = line.strip().split(":", 1)
452 if key.strip() != "Content-Type":
453 continue
454 charsetline = line
455 if charsetline is None:
456 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
457 else:
458 charset = re.search("charset=([^ ]*)", charsetline)
459 if charset is None:
460 newcharsetline = charsetline
461 if not newcharsetline.strip().endswith(";"):
462 newcharsetline += ";"
463 newcharsetline += " charset=%s" % self._encoding
464 else:
465 charset = charset.group(1)
466 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
467 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
468 header.target = headerstr
469
471 """Builds up this store from the internal cpo store.
472
473 A user must ensure that self._cpo_store already exists, and that it is
474 deleted afterwards."""
475 for unit in self._cpo_store.units:
476 self.addunit(self.UnitClass.buildfromunit(unit))
477 self._encoding = self._cpo_store._encoding
478
480 """Builds the internal cpo store from the data in self.
481
482 A user must ensure that self._cpo_store does not exist, and should
483 delete it after using it."""
484 self._cpo_store = cpo.pofile(noheader=True)
485 for unit in self.units:
486 if not unit.isblank():
487 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit, self._encoding))
488 if not self._cpo_store.header():
489
490 self._cpo_store.makeheader(charset=self._encoding, encoding="8bit")
491
493 """Parses the given file or file source string."""
494 try:
495 if hasattr(input, 'name'):
496 self.filename = input.name
497 elif not getattr(self, 'filename', ''):
498 self.filename = ''
499 tmp_header_added = False
500
501
502
503 self.units = []
504 self._cpo_store = cpo.pofile(input, noheader=True)
505 self._build_self_from_cpo()
506 del self._cpo_store
507 if tmp_header_added:
508 self.units = self.units[1:]
509 except Exception, e:
510 raise base.ParseError(e)
511
513 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
514
515
516 id_dict = {}
517 uniqueunits = []
518
519
520 markedpos = []
521
522 def addcomment(thepo):
523 thepo.msgidcomment = " ".join(thepo.getlocations())
524 markedpos.append(thepo)
525 for thepo in self.units:
526 id = thepo.getid()
527 if thepo.isheader() and not thepo.getlocations():
528
529 uniqueunits.append(thepo)
530 elif id in id_dict:
531 if duplicatestyle == "merge":
532 if id:
533 id_dict[id].merge(thepo)
534 else:
535 addcomment(thepo)
536 uniqueunits.append(thepo)
537 elif duplicatestyle == "msgctxt":
538 origpo = id_dict[id]
539 if origpo not in markedpos:
540 origpo._msgctxt += " ".join(origpo.getlocations())
541 markedpos.append(thepo)
542 thepo._msgctxt += " ".join(thepo.getlocations())
543 uniqueunits.append(thepo)
544 else:
545 if not id:
546 if duplicatestyle == "merge":
547 addcomment(thepo)
548 else:
549 thepo._msgctxt += u" ".join(thepo.getlocations())
550 id_dict[id] = thepo
551 uniqueunits.append(thepo)
552 self.units = uniqueunits
553
555 """Convert to a string. double check that unicode is handled somehow here"""
556 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True)
557 try:
558 self._build_cpo_from_self()
559 except UnicodeEncodeError, e:
560 self._encoding = "utf-8"
561 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8")
562 self._build_cpo_from_self()
563 output = str(self._cpo_store)
564 del self._cpo_store
565 return output
566