Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  import copy 
 26  import cStringIO 
 27  import re 
 28  import urllib 
 29   
 30  from translate.lang import data 
 31  from translate.misc.multistring import multistring 
 32  from translate.misc import quote 
 33  from translate.misc import textwrap 
 34  from translate.storage import pocommon, base, poparser 
 35  from translate.storage.pocommon import encodingToUse 
 36   
 37  lsep = "\n#: " 
 38  """Seperator for #: entries""" 
 39   
 40  # general functions for quoting / unquoting po strings 
 41   
 42  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 43  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 44   
 45   
46 -def escapeforpo(line):
47 """Escapes a line for po format. assumes no \n occurs in the line. 48 49 @param line: unescaped text 50 """ 51 special_locations = [] 52 for special_key in po_escape_map: 53 special_locations.extend(quote.find_all(line, special_key)) 54 special_locations = dict.fromkeys(special_locations).keys() 55 special_locations.sort() 56 escaped_line = "" 57 last_location = 0 58 for location in special_locations: 59 escaped_line += line[last_location:location] 60 escaped_line += po_escape_map[line[location:location+1]] 61 last_location = location + 1 62 escaped_line += line[last_location:] 63 return escaped_line
64 65
66 -def unescapehandler(escape):
67 return po_unescape_map.get(escape, escape)
68 69
70 -def wrapline(line):
71 """Wrap text for po files.""" 72 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 73 74 # Lines should not start with a space... 75 if len(wrappedlines) > 1: 76 for index, line in enumerate(wrappedlines[1:]): 77 if line.startswith(' '): 78 # Remove the space at the beginning of the line: 79 wrappedlines[index+1] = line[1:] 80 81 # Append a space to the previous line: 82 wrappedlines[index] += ' ' 83 return wrappedlines
84 85
86 -def quoteforpo(text):
87 """quotes the given text for a PO file, returning quoted and escaped lines""" 88 polines = [] 89 if text is None: 90 return polines 91 lines = text.split("\n") 92 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 93 if len(lines) != 2 or lines[1]: 94 polines.extend(['""']) 95 for line in lines[:-1]: 96 #TODO: We should only wrap after escaping 97 lns = wrapline(line) 98 if len(lns) > 0: 99 for ln in lns[:-1]: 100 polines.extend(['"' + escapeforpo(ln) + '"']) 101 if lns[-1]: 102 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 103 else: 104 polines.extend(['"\\n"']) 105 if lines[-1]: 106 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 107 return polines
108 109
110 -def extractpoline(line):
111 """Remove quote and unescape line from po file. 112 113 @param line: a quoted line from a po file (msgid or msgstr) 114 """ 115 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 116 return extracted
117 118
119 -def unquotefrompo(postr):
120 return "".join([extractpoline(line) for line in postr])
121 122
123 -def is_null(lst):
124 return lst == [] or len(lst) == 1 and lst[0] == '""'
125 126
127 -def extractstr(string):
128 left = string.find('"') 129 right = string.rfind('"') 130 if right > -1: 131 return string[left:right+1] 132 else: 133 return string[left:] + '"'
134 135
136 -class pounit(pocommon.pounit):
137 # othercomments = [] # # this is another comment 138 # automaticcomments = [] # #. comment extracted from the source code 139 # sourcecomments = [] # #: sourcefile.xxx:35 140 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 141 # prev_msgid = [] # 142 # prev_msgid_plural = [] # 143 # typecomments = [] # #, fuzzy 144 # msgidcomments = [] # _: within msgid 145 # msgctxt 146 # msgid = [] 147 # msgstr = [] 148 149 # Our homegrown way to indicate what must be copied in a shallow 150 # fashion 151 __shallow__ = ['_store'] 152
153 - def __init__(self, source=None, encoding="UTF-8"):
154 self._encoding = encodingToUse(encoding) 155 self.obsolete = False 156 self._initallcomments(blankall=True) 157 self.prev_msgctxt = [] 158 self.prev_msgid = [] 159 self.prev_msgid_plural = [] 160 self.msgctxt = [] 161 self.msgid = [] 162 self.msgid_pluralcomments = [] 163 self.msgid_plural = [] 164 self.msgstr = [] 165 pocommon.pounit.__init__(self, source)
166
167 - def _initallcomments(self, blankall=False):
168 """Initialises allcomments""" 169 if blankall: 170 self.othercomments = [] 171 self.automaticcomments = [] 172 self.sourcecomments = [] 173 self.typecomments = [] 174 self.msgidcomments = []
175
176 - def _get_all_comments(self):
177 return [self.othercomments, 178 self.automaticcomments, 179 self.sourcecomments, 180 self.typecomments, 181 self.msgidcomments, 182 ]
183 184 allcomments = property(_get_all_comments) 185
186 - def _get_source_vars(self, msgid, msgid_plural):
187 multi = multistring(unquotefrompo(msgid), self._encoding) 188 if self.hasplural(): 189 pluralform = unquotefrompo(msgid_plural) 190 if isinstance(pluralform, str): 191 pluralform = pluralform.decode(self._encoding) 192 multi.strings.append(pluralform) 193 return multi
194
195 - def _set_source_vars(self, source):
196 msgid = None 197 msgid_plural = None 198 if isinstance(source, str): 199 source = source.decode(self._encoding) 200 if isinstance(source, multistring): 201 source = source.strings 202 if isinstance(source, list): 203 msgid = quoteforpo(source[0]) 204 if len(source) > 1: 205 msgid_plural = quoteforpo(source[1]) 206 else: 207 msgid_plural = [] 208 else: 209 msgid = quoteforpo(source) 210 msgid_plural = [] 211 return msgid, msgid_plural
212
213 - def getsource(self):
214 """Returns the unescaped msgid""" 215 return self._get_source_vars(self.msgid, self.msgid_plural)
216
217 - def setsource(self, source):
218 """Sets the msgid to the given (unescaped) value. 219 220 @param source: an unescaped source string. 221 """ 222 self._rich_source = None 223 self.msgid, self.msgid_plural = self._set_source_vars(source)
224 source = property(getsource, setsource) 225
226 - def _get_prev_source(self):
227 """Returns the unescaped msgid""" 228 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
229
230 - def _set_prev_source(self, source):
231 """Sets the msgid to the given (unescaped) value. 232 233 @param source: an unescaped source string. 234 """ 235 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
236 prev_source = property(_get_prev_source, _set_prev_source) 237
238 - def gettarget(self):
239 """Returns the unescaped msgstr""" 240 if isinstance(self.msgstr, dict): 241 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 242 else: 243 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 244 return multi
245
246 - def settarget(self, target):
247 """Sets the msgstr to the given (unescaped) value""" 248 self._rich_target = None 249 if isinstance(target, str): 250 target = target.decode(self._encoding) 251 if self.hasplural(): 252 if isinstance(target, multistring): 253 target = target.strings 254 elif isinstance(target, basestring): 255 target = [target] 256 elif isinstance(target, (dict, list)): 257 if len(target) == 1: 258 target = target[0] 259 else: 260 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 261 templates = self.msgstr 262 if isinstance(templates, list): 263 templates = {0: templates} 264 if isinstance(target, list): 265 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 266 elif isinstance(target, dict): 267 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 268 else: 269 self.msgstr = quoteforpo(target)
270 target = property(gettarget, settarget) 271
272 - def getalttrans(self):
273 """Return a list of alternate units. 274 275 Previous msgid and current msgstr is combined to form a single 276 alternative unit.""" 277 prev_source = self.prev_source 278 if prev_source and self.isfuzzy(): 279 unit = type(self)(prev_source) 280 unit.target = self.target 281 # Already released versions of Virtaal (0.6.x) only supported XLIFF 282 # alternatives, and expect .xmlelement.get(). 283 # This can be removed soon: 284 unit.xmlelement = dict() 285 return [unit] 286 return []
287
288 - def getnotes(self, origin=None):
289 """Return comments based on origin value (programmer, developer, source code and translator)""" 290 if origin == None: 291 comments = u"".join([comment[2:] for comment in self.othercomments]) 292 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 293 elif origin == "translator": 294 comments = u"".join([comment[2:] for comment in self.othercomments]) 295 elif origin in ["programmer", "developer", "source code"]: 296 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 297 else: 298 raise ValueError("Comment type not valid") 299 # Let's drop the last newline 300 return comments[:-1]
301
302 - def addnote(self, text, origin=None, position="append"):
303 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 304 # ignore empty strings and strings without non-space characters 305 if not (text and text.strip()): 306 return 307 text = data.forceunicode(text) 308 commentlist = self.othercomments 309 linestart = "# " 310 autocomments = False 311 if origin in ["programmer", "developer", "source code"]: 312 autocomments = True 313 commentlist = self.automaticcomments 314 linestart = "#. " 315 text = text.split("\n") 316 newcomments = [linestart + line + "\n" for line in text] 317 if position == "append": 318 newcomments = commentlist + newcomments 319 elif position == "prepend": 320 newcomments = newcomments + commentlist 321 322 if autocomments: 323 self.automaticcomments = newcomments 324 else: 325 self.othercomments = newcomments
326
327 - def removenotes(self):
328 """Remove all the translator's notes (other comments)""" 329 self.othercomments = []
330
331 - def __deepcopy__(self, memo={}):
332 # Make an instance to serve as the copy 333 new_unit = self.__class__() 334 # We'll be testing membership frequently, so make a set from 335 # self.__shallow__ 336 shallow = set(self.__shallow__) 337 # Make deep copies of all members which are not in shallow 338 for key, value in self.__dict__.iteritems(): 339 if key not in shallow: 340 setattr(new_unit, key, copy.deepcopy(value)) 341 # Make shallow copies of all members which are in shallow 342 for key in set(shallow): 343 setattr(new_unit, key, getattr(self, key)) 344 # Mark memo with ourself, so that we won't get deep copied 345 # again 346 memo[id(self)] = self 347 # Return our copied unit 348 return new_unit
349
350 - def copy(self):
351 return copy.deepcopy(self)
352
353 - def _msgidlen(self):
354 if self.hasplural(): 355 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 356 else: 357 return len(unquotefrompo(self.msgid))
358
359 - def _msgstrlen(self):
360 if isinstance(self.msgstr, dict): 361 combinedstr = "\n".join(filter(None, [unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()])) 362 return len(combinedstr) 363 else: 364 return len(unquotefrompo(self.msgstr))
365
366 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
367 """Merges the otherpo (with the same msgid) into this one. 368 369 Overwrite non-blank self.msgstr only if overwrite is True 370 merge comments only if comments is True 371 """ 372 373 def mergelists(list1, list2, split=False): 374 #decode where necessary 375 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 376 for position, item in enumerate(list1): 377 if isinstance(item, str): 378 list1[position] = item.decode("utf-8") 379 for position, item in enumerate(list2): 380 if isinstance(item, str): 381 list2[position] = item.decode("utf-8") 382 383 #Determine the newline style of list1 384 lineend = "" 385 if list1 and list1[0]: 386 for candidate in ["\n", "\r", "\n\r"]: 387 if list1[0].endswith(candidate): 388 lineend = candidate 389 if not lineend: 390 lineend = "" 391 else: 392 lineend = "\n" 393 394 #Split if directed to do so: 395 if split: 396 splitlist1 = [] 397 splitlist2 = [] 398 prefix = "#" 399 for item in list1: 400 splitlist1.extend(item.split()[1:]) 401 prefix = item.split()[0] 402 for item in list2: 403 splitlist2.extend(item.split()[1:]) 404 prefix = item.split()[0] 405 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 406 else: 407 #Normal merge, but conform to list1 newline style 408 if list1 != list2: 409 for item in list2: 410 if lineend: 411 item = item.rstrip() + lineend 412 # avoid duplicate comment lines (this might cause some problems) 413 if item not in list1 or len(item) < 5: 414 list1.append(item)
415 if not isinstance(otherpo, pounit): 416 super(pounit, self).merge(otherpo, overwrite, comments) 417 return 418 if comments: 419 mergelists(self.othercomments, otherpo.othercomments) 420 mergelists(self.typecomments, otherpo.typecomments) 421 if not authoritative: 422 # We don't bring across otherpo.automaticcomments as we consider ourself 423 # to be the the authority. Same applies to otherpo.msgidcomments 424 mergelists(self.automaticcomments, otherpo.automaticcomments) 425 mergelists(self.msgidcomments, otherpo.msgidcomments) 426 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 427 if not self.istranslated() or overwrite: 428 # Remove kde-style comments from the translation (if any). 429 if self._extract_msgidcomments(otherpo.target): 430 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 431 self.target = otherpo.target 432 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 433 self.markfuzzy() 434 else: 435 self.markfuzzy(otherpo.isfuzzy()) 436 elif not otherpo.istranslated(): 437 if self.source != otherpo.source: 438 self.markfuzzy() 439 else: 440 if self.target != otherpo.target: 441 self.markfuzzy()
442
443 - def isheader(self):
444 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 445 #rewritten here for performance: 446 return (is_null(self.msgid) 447 and not is_null(self.msgstr) 448 and self.msgidcomments == [] 449 and is_null(self.msgctxt))
450
451 - def isblank(self):
452 if self.isheader() or len(self.msgidcomments): 453 return False 454 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 455 return True 456 return False
457 # TODO: remove: 458 # Before, the equivalent of the following was the final return statement: 459 # return len(self.source.strip()) == 0 460
461 - def hastypecomment(self, typecomment):
462 """Check whether the given type comment is present""" 463 # check for word boundaries properly by using a regular expression... 464 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
465
466 - def hasmarkedcomment(self, commentmarker):
467 """Check whether the given comment marker is present as # (commentmarker) ...""" 468 commentmarker = "(%s)" % commentmarker 469 for comment in self.othercomments: 470 if comment.replace("#", "", 1).strip().startswith(commentmarker): 471 return True 472 return False
473
474 - def settypecomment(self, typecomment, present=True):
475 """Alters whether a given typecomment is present""" 476 if self.hastypecomment(typecomment) != present: 477 if present: 478 if len(self.typecomments): 479 # There is already a comment, so we have to add onto it 480 self.typecomments[0] = "%s, %s\n" % (self.typecomments[0][:-1], typecomment) 481 else: 482 self.typecomments.append("#, %s\n" % typecomment) 483 else: 484 # this should handle word boundaries properly ... 485 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 486 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
487
488 - def isfuzzy(self):
489 return self.hastypecomment('fuzzy')
490
491 - def markfuzzy(self, present=True):
492 if present: 493 self.set_state_n(self.STATE[self.S_FUZZY][0]) 494 elif self.hasplural() and not self._msgstrlen() or is_null(self.msgstr): 495 self.set_state_n(self.STATE[self.S_UNTRANSLATED][0]) 496 else: 497 self.set_state_n(self.STATE[self.S_TRANSLATED][0])
498
499 - def _domarkfuzzy(self, present=True):
500 self.settypecomment("fuzzy", present)
501
502 - def infer_state(self):
503 if self.obsolete: 504 self.makeobsolete() 505 else: 506 self.markfuzzy(self.hastypecomment('fuzzy'))
507
508 - def isobsolete(self):
509 return self.obsolete
510
511 - def makeobsolete(self):
512 """Makes this unit obsolete""" 513 super(pounit, self).makeobsolete() 514 self.obsolete = True 515 self.sourcecomments = [] 516 self.automaticcomments = []
517
518 - def resurrect(self):
519 """Makes an obsolete unit normal""" 520 super(pounit, self).resurrect() 521 self.obsolete = False
522
523 - def hasplural(self):
524 """returns whether this pounit contains plural strings...""" 525 return len(self.msgid_plural) > 0
526
527 - def parse(self, src):
528 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
529
530 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
531 if isinstance(partlines, dict): 532 partkeys = partlines.keys() 533 partkeys.sort() 534 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 535 partstr = partname + " " 536 partstartline = 0 537 if len(partlines) > 0 and len(partcomments) == 0: 538 partstr += partlines[0] 539 partstartline = 1 540 elif len(partcomments) > 0: 541 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 542 # if there is a blank leader line, it must come before the comment 543 partstr += partlines[0] + '\n' 544 # but if the whole string is blank, leave it in 545 if len(partlines) > 1: 546 partstartline += 1 547 else: 548 # All partcomments should start on a newline 549 partstr += '""\n' 550 # combine comments into one if more than one 551 if len(partcomments) > 1: 552 combinedcomment = [] 553 for comment in partcomments: 554 comment = unquotefrompo([comment]) 555 if comment.startswith("_:"): 556 comment = comment[len("_:"):] 557 if comment.endswith("\\n"): 558 comment = comment[:-len("\\n")] 559 #Before we used to strip. Necessary in some cases? 560 combinedcomment.append(comment) 561 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 562 # comments first, no blank leader line needed 563 partstr += "\n".join(partcomments) 564 partstr = quote.rstripeol(partstr) 565 else: 566 partstr += '""' 567 partstr += '\n' 568 # add the rest 569 for partline in partlines[partstartline:]: 570 partstr += partline + '\n' 571 return partstr
572
573 - def _encodeifneccessary(self, output):
574 """encodes unicode strings and returns other strings unchanged""" 575 if isinstance(output, unicode): 576 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8")) 577 return output.encode(encoding) 578 return output
579
580 - def __str__(self):
581 """convert to a string. double check that unicode is handled somehow here""" 582 output = self._getoutput() 583 return self._encodeifneccessary(output)
584
585 - def _getoutput(self):
586 """return this po element as a string""" 587 588 def add_prev_msgid_lines(lines, prefix, header, var): 589 if len(var) > 0: 590 lines.append("%s %s %s\n" % (prefix, header, var[0])) 591 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
592 593 def add_prev_msgid_info(lines, prefix): 594 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt) 595 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid) 596 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural) 597 598 lines = [] 599 lines.extend(self.othercomments) 600 if self.isobsolete(): 601 lines.extend(self.typecomments) 602 obsoletelines = [] 603 add_prev_msgid_info(obsoletelines, prefix="#~|") 604 if self.msgctxt: 605 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.msgctxt)) 606 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.msgid, self.msgidcomments)) 607 if self.msgid_plural or self.msgid_pluralcomments: 608 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 609 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.msgstr)) 610 for index, obsoleteline in enumerate(obsoletelines): 611 # We need to account for a multiline msgid or msgstr here 612 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 613 lines.extend(obsoletelines) 614 return u"".join(lines) 615 # if there's no msgid don't do msgid and string, unless we're the header 616 # this will also discard any comments other than plain othercomments... 617 if is_null(self.msgid): 618 if not (self.isheader() or self.getcontext() or self.sourcecomments): 619 return u"".join(lines) 620 lines.extend(self.automaticcomments) 621 lines.extend(self.sourcecomments) 622 lines.extend(self.typecomments) 623 add_prev_msgid_info(lines, prefix="#|") 624 if self.msgctxt: 625 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt)) 626 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments)) 627 if self.msgid_plural or self.msgid_pluralcomments: 628 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 629 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr)) 630 postr = u"".join(lines) 631 return postr 632
633 - def getlocations(self):
634 """Get a list of locations from sourcecomments in the PO unit 635 636 rtype: List 637 return: A list of the locations with '#: ' stripped 638 639 """ 640 locations = [] 641 for sourcecomment in self.sourcecomments: 642 locations += quote.rstripeol(sourcecomment)[3:].split() 643 for i, loc in enumerate(locations): 644 locations[i] = urllib.unquote_plus(loc) 645 return locations
646
647 - def addlocation(self, location):
648 """Add a location to sourcecomments in the PO unit 649 650 @param location: Text location e.g. 'file.c:23' does not include #: 651 @type location: String 652 653 """ 654 if location.find(" ") != -1: 655 location = urllib.quote_plus(location) 656 self.sourcecomments.append("#: %s\n" % location)
657
658 - def _extract_msgidcomments(self, text=None):
659 """Extract KDE style msgid comments from the unit. 660 661 @rtype: String 662 @return: Returns the extracted msgidcomments found in this unit's msgid. 663 """ 664 665 if not text: 666 text = unquotefrompo(self.msgidcomments) 667 return text.split('\n')[0].replace('_: ', '', 1)
668
669 - def setmsgidcomment(self, msgidcomment):
670 if msgidcomment: 671 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 672 else: 673 self.msgidcomments = []
674 675 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 676
677 - def getcontext(self):
678 """Get the message context.""" 679 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
680
681 - def setcontext(self, context):
682 context = data.forceunicode(context) 683 self.msgctxt = quoteforpo(context)
684
685 - def getid(self):
686 """Returns a unique identifier for this unit.""" 687 context = self.getcontext() 688 # Gettext does not consider the plural to determine duplicates, only 689 # the msgid. For generation of .mo files, we might want to use this 690 # code to generate the entry for the hash table, but for now, it is 691 # commented out for conformance to gettext. 692 # id = '\0'.join(self.source.strings) 693 id = self.source 694 if self.msgidcomments: 695 id = u"_: %s\n%s" % (context, id) 696 elif context: 697 id = u"%s\04%s" % (context, id) 698 return id
699 700
701 -class pofile(pocommon.pofile):
702 """A .po file containing various units""" 703 UnitClass = pounit 704
705 - def parse(self, input):
706 """Parses the given file or file source string.""" 707 if True: 708 # try: 709 if hasattr(input, 'name'): 710 self.filename = input.name 711 elif not getattr(self, 'filename', ''): 712 self.filename = '' 713 if isinstance(input, str): 714 input = cStringIO.StringIO(input) 715 # clear units to get rid of automatically generated headers before parsing 716 self.units = [] 717 poparser.parse_units(poparser.ParseState(input, pounit), self)
718 # except Exception, e: 719 # raise base.ParseError(e) 720
721 - def removeduplicates(self, duplicatestyle="merge"):
722 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 723 # TODO: can we handle consecutive calls to removeduplicates()? What 724 # about files already containing msgctxt? - test 725 id_dict = {} 726 uniqueunits = [] 727 # TODO: this is using a list as the pos aren't hashable, but this is slow. 728 # probably not used frequently enough to worry about it, though. 729 markedpos = [] 730 731 def addcomment(thepo): 732 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 733 markedpos.append(thepo)
734 for thepo in self.units: 735 id = thepo.getid() 736 if thepo.isheader() and not thepo.getlocations(): 737 # header msgids shouldn't be merged... 738 uniqueunits.append(thepo) 739 elif id in id_dict: 740 if duplicatestyle == "merge": 741 if id: 742 id_dict[id].merge(thepo) 743 else: 744 addcomment(thepo) 745 uniqueunits.append(thepo) 746 elif duplicatestyle == "msgctxt": 747 origpo = id_dict[id] 748 if origpo not in markedpos: 749 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 750 markedpos.append(thepo) 751 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 752 uniqueunits.append(thepo) 753 else: 754 if not id: 755 if duplicatestyle == "merge": 756 addcomment(thepo) 757 else: 758 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 759 id_dict[id] = thepo 760 uniqueunits.append(thepo) 761 self.units = uniqueunits
762
763 - def __str__(self):
764 """Convert to a string. double check that unicode is handled somehow here""" 765 output = self._getoutput() 766 if isinstance(output, unicode): 767 try: 768 return output.encode(getattr(self, "_encoding", "UTF-8")) 769 except UnicodeEncodeError, e: 770 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 771 self._encoding = "UTF-8" 772 for unit in self.units: 773 unit._encoding = "UTF-8" 774 return self._getoutput().encode("UTF-8") 775 776 return output
777
778 - def _getoutput(self):
779 """convert the units back to lines""" 780 lines = [] 781 for unit in self.units: 782 unitsrc = unit._getoutput() + u"\n" 783 lines.append(unitsrc) 784 lines = u"".join(lines).rstrip() 785 #After the last pounit we will have \n\n and we only want to end in \n: 786 if lines: 787 lines += u"\n" 788 return lines
789
790 - def encode(self, lines):
791 """encode any unicode strings in lines in self._encoding""" 792 newlines = [] 793 encoding = self._encoding 794 if encoding is None or encoding.lower() == "charset": 795 encoding = 'UTF-8' 796 for line in lines: 797 if isinstance(line, unicode): 798 line = line.encode(encoding) 799 newlines.append(line) 800 return newlines
801
802 - def decode(self, lines):
803 """decode any non-unicode strings in lines with self._encoding""" 804 newlines = [] 805 for line in lines: 806 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 807 try: 808 line = line.decode(self._encoding) 809 except UnicodeError, e: 810 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 811 newlines.append(line) 812 return newlines
813
814 - def unit_iter(self):
815 for unit in self.units: 816 if not (unit.isheader() or unit.isobsolete()): 817 yield unit
818