Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2011 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  import re 
 32  import copy 
 33  import cStringIO 
 34  import urllib 
 35   
 36  from translate.lang import data 
 37  from translate.misc.multistring import multistring 
 38  from translate.storage import pocommon, base, cpo, poparser 
 39  from translate.storage.pocommon import encodingToUse 
 40   
 41  lsep = " " 
 42  """Seperator for #: entries""" 
 43   
 44  basic_header = r'''msgid "" 
 45  msgstr "" 
 46  "Content-Type: text/plain; charset=UTF-8\n" 
 47  "Content-Transfer-Encoding: 8bit\n" 
 48  ''' 
 49   
 50   
51 -class pounit(pocommon.pounit):
52 # othercomments = [] # # this is another comment 53 # automaticcomments = [] # #. comment extracted from the source code 54 # sourcecomments = [] # #: sourcefile.xxx:35 55 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 56 # prev_msgid = [] # 57 # prev_msgid_plural = [] # 58 # typecomments = [] # #, fuzzy 59 # msgidcomment = u"" # _: within msgid 60 # msgctxt 61 # msgid = [] 62 # msgstr = [] 63 64 # Our homegrown way to indicate what must be copied in a shallow 65 # fashion 66 __shallow__ = ['_store'] 67
68 - def __init__(self, source=None, encoding="UTF-8"):
69 pocommon.pounit.__init__(self, source) 70 self._encoding = encodingToUse(encoding) 71 self._initallcomments(blankall=True) 72 self._msgctxt = u"" 73 74 self.target = u""
75
76 - def _initallcomments(self, blankall=False):
77 """Initialises allcomments""" 78 if blankall: 79 self.othercomments = [] 80 self.automaticcomments = [] 81 self.sourcecomments = [] 82 self.typecomments = [] 83 self.msgidcomment = u""
84
85 - def getsource(self):
86 return self._source
87
88 - def setsource(self, source):
89 self._rich_source = None 90 # assert isinstance(source, unicode) 91 source = data.forceunicode(source or u"") 92 source = source or u"" 93 if isinstance(source, multistring): 94 self._source = source 95 elif isinstance(source, unicode): 96 self._source = source 97 else: 98 #unicode, list, dict 99 self._source = multistring(source)
100 source = property(getsource, setsource) 101
102 - def gettarget(self):
103 """Returns the unescaped msgstr""" 104 return self._target
105
106 - def settarget(self, target):
107 """Sets the msgstr to the given (unescaped) value""" 108 self._rich_target = None 109 # assert isinstance(target, unicode) 110 # target = data.forceunicode(target) 111 if self.hasplural(): 112 if isinstance(target, multistring): 113 self._target = target 114 else: 115 #unicode, list, dict 116 self._target = multistring(target) 117 elif isinstance(target, (dict, list)): 118 if len(target) == 1: 119 self._target = target[0] 120 else: 121 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 122 else: 123 self._target = target
124 target = property(gettarget, settarget) 125
126 - def getnotes(self, origin=None):
127 """Return comments based on origin value (programmer, developer, source code and translator)""" 128 if origin == None: 129 comments = u"\n".join(self.othercomments) 130 comments += u"\n".join(self.automaticcomments) 131 elif origin == "translator": 132 comments = u"\n".join(self.othercomments) 133 elif origin in ["programmer", "developer", "source code"]: 134 comments = u"\n".join(self.automaticcomments) 135 else: 136 raise ValueError("Comment type not valid") 137 return comments
138
139 - def addnote(self, text, origin=None, position="append"):
140 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 141 # ignore empty strings and strings without non-space characters 142 if not (text and text.strip()): 143 return 144 text = data.forceunicode(text) 145 commentlist = self.othercomments 146 autocomments = False 147 if origin in ["programmer", "developer", "source code"]: 148 autocomments = True 149 commentlist = self.automaticcomments 150 if text.endswith(u'\n'): 151 text = text[:-1] 152 newcomments = text.split(u"\n") 153 if position == "append": 154 newcomments = commentlist + newcomments 155 elif position == "prepend": 156 newcomments = newcomments + commentlist 157 158 if autocomments: 159 self.automaticcomments = newcomments 160 else: 161 self.othercomments = newcomments
162
163 - def removenotes(self):
164 """Remove all the translator's notes (other comments)""" 165 self.othercomments = []
166
167 - def __deepcopy__(self, memo={}):
168 # Make an instance to serve as the copy 169 new_unit = self.__class__() 170 # We'll be testing membership frequently, so make a set from 171 # self.__shallow__ 172 shallow = set(self.__shallow__) 173 # Make deep copies of all members which are not in shallow 174 for key, value in self.__dict__.iteritems(): 175 if key not in shallow: 176 setattr(new_unit, key, copy.deepcopy(value)) 177 # Make shallow copies of all members which are in shallow 178 for key in set(shallow): 179 setattr(new_unit, key, getattr(self, key)) 180 # Mark memo with ourself, so that we won't get deep copied 181 # again 182 memo[id(self)] = self 183 # Return our copied unit 184 return new_unit
185
186 - def copy(self):
187 return copy.deepcopy(self)
188
189 - def _msgidlen(self):
190 if self.hasplural(): 191 len("".join([string for string in self.source.strings])) 192 else: 193 return len(self.source)
194
195 - def _msgstrlen(self):
196 if self.hasplural(): 197 len("".join([string for string in self.target.strings])) 198 else: 199 return len(self.target)
200
201 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
202 """Merges the otherpo (with the same msgid) into this one. 203 204 Overwrite non-blank self.msgstr only if overwrite is True 205 merge comments only if comments is True 206 """ 207 208 def mergelists(list1, list2, split=False): 209 #decode where necessary 210 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 211 for position, item in enumerate(list1): 212 if isinstance(item, str): 213 list1[position] = item.decode("utf-8") 214 for position, item in enumerate(list2): 215 if isinstance(item, str): 216 list2[position] = item.decode("utf-8") 217 218 #Determine the newline style of list2 219 lineend = "" 220 if list2 and list2[0]: 221 for candidate in ["\n", "\r", "\n\r"]: 222 if list2[0].endswith(candidate): 223 lineend = candidate 224 if not lineend: 225 lineend = "" 226 227 #Split if directed to do so: 228 if split: 229 splitlist1 = [] 230 splitlist2 = [] 231 for item in list1: 232 splitlist1.extend(item.split()) 233 for item in list2: 234 splitlist2.extend(item.split()) 235 list1.extend([item for item in splitlist2 if not item in splitlist1]) 236 else: 237 #Normal merge, but conform to list1 newline style 238 if list1 != list2: 239 for item in list2: 240 item = item.rstrip(lineend) 241 # avoid duplicate comment lines (this might cause some problems) 242 if item not in list1 or len(item) < 5: 243 list1.append(item)
244 245 if not isinstance(otherpo, pounit): 246 super(pounit, self).merge(otherpo, overwrite, comments) 247 return 248 if comments: 249 mergelists(self.othercomments, otherpo.othercomments) 250 mergelists(self.typecomments, otherpo.typecomments) 251 if not authoritative: 252 # We don't bring across otherpo.automaticcomments as we consider ourself 253 # to be the the authority. Same applies to otherpo.msgidcomments 254 mergelists(self.automaticcomments, otherpo.automaticcomments) 255 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 256 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 257 if not self.istranslated() or overwrite: 258 # Remove kde-style comments from the translation (if any). XXX - remove 259 if pocommon.extract_msgid_comment(otherpo.target): 260 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 261 self.target = otherpo.target 262 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 263 self.markfuzzy() 264 else: 265 self.markfuzzy(otherpo.isfuzzy()) 266 elif not otherpo.istranslated(): 267 if self.source != otherpo.source: 268 self.markfuzzy() 269 else: 270 if self.target != otherpo.target: 271 self.markfuzzy()
272
273 - def isheader(self):
274 #TODO: fix up nicely 275 return not self.getid() and len(self.target) > 0
276
277 - def isblank(self):
278 if self.isheader() or self.msgidcomment: 279 return False 280 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 281 return True 282 return False
283
284 - def hastypecomment(self, typecomment):
285 """Check whether the given type comment is present""" 286 # check for word boundaries properly by using a regular expression... 287 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
288
289 - def hasmarkedcomment(self, commentmarker):
290 """Check whether the given comment marker is present as # (commentmarker) ...""" 291 # raise DeprecationWarning 292 commentmarker = "(%s)" % commentmarker 293 for comment in self.othercomments: 294 if comment.startswith(commentmarker): 295 return True 296 return False
297
298 - def settypecomment(self, typecomment, present=True):
299 """Alters whether a given typecomment is present""" 300 if self.hastypecomment(typecomment) != present: 301 if present: 302 self.typecomments.append("#, %s\n" % typecomment) 303 else: 304 # this should handle word boundaries properly ... 305 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 306 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
307
308 - def istranslated(self):
309 return super(pounit, self).istranslated() and not self.isobsolete()
310
311 - def istranslatable(self):
312 return not (self.isheader() or self.isblank() or self.isobsolete())
313
314 - def isfuzzy(self):
315 return self.hastypecomment("fuzzy")
316
317 - def _domarkfuzzy(self, present=True):
318 self.settypecomment("fuzzy", present)
319
320 - def makeobsolete(self):
321 """Makes this unit obsolete""" 322 self.sourcecomments = [] 323 self.automaticcomments = [] 324 super(pounit, self).makeobsolete()
325
326 - def hasplural(self):
327 """returns whether this pounit contains plural strings...""" 328 source = self.source 329 return isinstance(source, multistring) and len(source.strings) > 1
330
331 - def parse(self, src):
332 raise DeprecationWarning("Should not be parsing with a unit") 333 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
334
335 - def __str__(self):
336 """convert to a string. double check that unicode is handled somehow here""" 337 _cpo_unit = cpo.pounit.buildfromunit(self) 338 return str(_cpo_unit)
339
340 - def getlocations(self):
341 """Get a list of locations from sourcecomments in the PO unit 342 343 rtype: List 344 return: A list of the locations with '#: ' stripped 345 346 """ 347 #TODO: rename to .locations 348 return [urllib.unquote_plus(loc) for loc in self.sourcecomments]
349
350 - def addlocation(self, location):
351 """Add a location to sourcecomments in the PO unit 352 353 @param location: Text location e.g. 'file.c:23' does not include #: 354 @type location: String 355 """ 356 if location.find(" ") != -1: 357 location = urllib.quote_plus(location) 358 self.sourcecomments.extend(location.split())
359
360 - def _extract_msgidcomments(self, text=None):
361 """Extract KDE style msgid comments from the unit. 362 363 @rtype: String 364 @return: Returns the extracted msgidcomments found in this unit's msgid. 365 """ 366 if text: 367 return pocommon.extract_msgid_comment(text) 368 else: 369 return self.msgidcomment
370
371 - def getcontext(self):
372 """Get the message context.""" 373 return self._msgctxt + self.msgidcomment
374
375 - def setcontext(self, context):
376 context = data.forceunicode(context or u"") 377 self._msgctxt = context
378
379 - def getid(self):
380 """Returns a unique identifier for this unit.""" 381 context = self.getcontext() 382 # Gettext does not consider the plural to determine duplicates, only 383 # the msgid. For generation of .mo files, we might want to use this 384 # code to generate the entry for the hash table, but for now, it is 385 # commented out for conformance to gettext. 386 # id = '\0'.join(self.source.strings) 387 id = self.source 388 if self.msgidcomment: 389 id = u"_: %s\n%s" % (context, id) 390 elif context: 391 id = u"%s\04%s" % (context, id) 392 return id
393
394 - def buildfromunit(cls, unit):
395 """Build a native unit from a foreign unit, preserving as much 396 information as possible.""" 397 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 398 return unit.copy() 399 elif isinstance(unit, pocommon.pounit): 400 newunit = cls(unit.source) 401 newunit.target = unit.target 402 #context 403 newunit.msgidcomment = unit._extract_msgidcomments() 404 if not newunit.msgidcomment: 405 newunit._msgctxt = unit.getcontext() 406 407 locations = unit.getlocations() 408 if locations: 409 newunit.addlocations(locations) 410 notes = unit.getnotes("developer") 411 if notes: 412 newunit.addnote(notes, "developer") 413 notes = unit.getnotes("translator") 414 if notes: 415 newunit.addnote(notes, "translator") 416 newunit.markfuzzy(unit.isfuzzy()) 417 if unit.isobsolete(): 418 newunit.makeobsolete() 419 for tc in ['python-format', 'c-format', 'php-format']: 420 if unit.hastypecomment(tc): 421 newunit.settypecomment(tc) 422 break 423 return newunit 424 else: 425 return base.TranslationUnit.buildfromunit(unit)
426 buildfromunit = classmethod(buildfromunit) 427 428
429 -class pofile(pocommon.pofile):
430 """A .po file containing various units""" 431 UnitClass = pounit 432
433 - def changeencoding(self, newencoding):
434 """Deprecated: changes the encoding on the file.""" 435 # This should not be here but in poheader. It also shouldn't mangle the 436 # header itself, but use poheader methods. All users are removed, so 437 # we can deprecate after one release. 438 raise DeprecationWarning 439 440 self._encoding = encodingToUse(newencoding) 441 if not self.units: 442 return 443 header = self.header() 444 if not header or header.isblank(): 445 return 446 charsetline = None 447 headerstr = header.target 448 for line in headerstr.split("\n"): 449 if not ":" in line: 450 continue 451 key, value = line.strip().split(":", 1) 452 if key.strip() != "Content-Type": 453 continue 454 charsetline = line 455 if charsetline is None: 456 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 457 else: 458 charset = re.search("charset=([^ ]*)", charsetline) 459 if charset is None: 460 newcharsetline = charsetline 461 if not newcharsetline.strip().endswith(";"): 462 newcharsetline += ";" 463 newcharsetline += " charset=%s" % self._encoding 464 else: 465 charset = charset.group(1) 466 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 467 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 468 header.target = headerstr
469
470 - def _build_self_from_cpo(self):
471 """Builds up this store from the internal cpo store. 472 473 A user must ensure that self._cpo_store already exists, and that it is 474 deleted afterwards.""" 475 for unit in self._cpo_store.units: 476 self.addunit(self.UnitClass.buildfromunit(unit)) 477 self._encoding = self._cpo_store._encoding
478
479 - def _build_cpo_from_self(self):
480 """Builds the internal cpo store from the data in self. 481 482 A user must ensure that self._cpo_store does not exist, and should 483 delete it after using it.""" 484 self._cpo_store = cpo.pofile(noheader=True) 485 for unit in self.units: 486 if not unit.isblank(): 487 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit, self._encoding)) 488 if not self._cpo_store.header(): 489 #only add a temporary header 490 self._cpo_store.makeheader(charset=self._encoding, encoding="8bit")
491
492 - def parse(self, input):
493 """Parses the given file or file source string.""" 494 try: 495 if hasattr(input, 'name'): 496 self.filename = input.name 497 elif not getattr(self, 'filename', ''): 498 self.filename = '' 499 tmp_header_added = False 500 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 501 # input = basic_header + input 502 # tmp_header_added = True 503 self.units = [] 504 self._cpo_store = cpo.pofile(input, noheader=True) 505 self._build_self_from_cpo() 506 del self._cpo_store 507 if tmp_header_added: 508 self.units = self.units[1:] 509 except Exception, e: 510 raise base.ParseError(e)
511
512 - def removeduplicates(self, duplicatestyle="merge"):
513 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 514 # TODO: can we handle consecutive calls to removeduplicates()? What 515 # about files already containing msgctxt? - test 516 id_dict = {} 517 uniqueunits = [] 518 # TODO: this is using a list as the pos aren't hashable, but this is slow. 519 # probably not used frequently enough to worry about it, though. 520 markedpos = [] 521 522 def addcomment(thepo): 523 thepo.msgidcomment = " ".join(thepo.getlocations()) 524 markedpos.append(thepo)
525 for thepo in self.units: 526 id = thepo.getid() 527 if thepo.isheader() and not thepo.getlocations(): 528 # header msgids shouldn't be merged... 529 uniqueunits.append(thepo) 530 elif id in id_dict: 531 if duplicatestyle == "merge": 532 if id: 533 id_dict[id].merge(thepo) 534 else: 535 addcomment(thepo) 536 uniqueunits.append(thepo) 537 elif duplicatestyle == "msgctxt": 538 origpo = id_dict[id] 539 if origpo not in markedpos: 540 origpo._msgctxt += " ".join(origpo.getlocations()) 541 markedpos.append(thepo) 542 thepo._msgctxt += " ".join(thepo.getlocations()) 543 uniqueunits.append(thepo) 544 else: 545 if not id: 546 if duplicatestyle == "merge": 547 addcomment(thepo) 548 else: 549 thepo._msgctxt += u" ".join(thepo.getlocations()) 550 id_dict[id] = thepo 551 uniqueunits.append(thepo) 552 self.units = uniqueunits
553
554 - def __str__(self):
555 """Convert to a string. double check that unicode is handled somehow here""" 556 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True) 557 try: 558 self._build_cpo_from_self() 559 except UnicodeEncodeError, e: 560 self._encoding = "utf-8" 561 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 562 self._build_cpo_from_self() 563 output = str(self._cpo_store) 564 del self._cpo_store 565 return output
566