Package translate :: Package storage :: Module properties
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.properties

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Classes that hold units of .properties, and similar, files that are used in 
 23     translating Java, Mozilla, MacOS and other software. 
 24   
 25     The L{propfile} class is a monolingual class with L{propunit} providing unit 
 26     level access. 
 27   
 28     The .properties store has become a general key value pair class with 
 29     L{Dialect} providing the ability to change the behaviour of the parsing 
 30     and handling of the various dialects. 
 31   
 32     Currently we support:: 
 33       * Java .properties 
 34       * Mozilla .properties 
 35       * Adobe Flex files 
 36       * MacOS X .strings files 
 37       * Skype .lang files 
 38   
 39   
 40     Dialects 
 41     ======== 
 42     The following provides references and descriptions of the various dialects supported:: 
 43   
 44     Java 
 45     ---- 
 46     Java .properties are supported completely except for the ability to drop 
 47     pairs that are not translated. 
 48   
 49     The following U{.properties file 
 50     description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>} 
 51     and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give 
 52     some good references to the .properties specification. 
 53   
 54     Properties file may also hold Java 
 55     U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>} 
 56     messages.  No special handling is provided in this storage class for 
 57     MessageFormat, but this may be implemented in future. 
 58   
 59     All delimiter types, comments, line continuations and spaces handling in 
 60     delimeters are supported. 
 61   
 62     Mozilla 
 63     ------- 
 64     Mozilla files use '=' as a delimiter, are UTF-8 encoded and thus don't need \\u 
 65     escaping.  Any \\U values will be converted to correct Unicode characters. 
 66  ` 
 67     Strings 
 68     ------- 
 69     Mac OS X strings files are implemented using 
 70     U{these<http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html>} 
 71     U{two<http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html>} 
 72     articles as references. 
 73   
 74     Flex 
 75     ---- 
 76     Adobe Flex files seem to be normal .properties files but in UTF-8 just like 
 77     Mozilla files. This 
 78     U{page<http://livedocs.adobe.com/flex/3/html/help.html?content=l10n_3.html>} 
 79     provides the information used to implement the dialect. 
 80   
 81     Skype 
 82     ----- 
 83     Skype .lang files seem to be UTF-16 encoded .properties files. 
 84   
 85     Implementation 
 86     ============== 
 87   
 88     A simple summary of what is permissible follows. 
 89   
 90     Comments supported:: 
 91       # a comment 
 92       ! a comment 
 93       // a comment (only at the beginning of a line) 
 94       /* a comment (not across multiple lines) */ 
 95   
 96     Name and Value pairs:: 
 97       # Delimiters 
 98       key = value 
 99       key : value 
100       key value 
101   
102       # Space in key and around value 
103       \ key\ = \ value 
104   
105       # Note that the b and c are escaped for epydoc rendering 
106       b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123 
107       c = a string with a continuation line \\ 
108           continuation line 
109   
110       # Special cases 
111       # key with no value 
112       key 
113       # value no key (extractable in prop2po but not mergeable in po2prop) 
114       =value 
115   
116       # .strings specific 
117       "key" = "value"; 
118  '" 
119  """ 
120   
121  import re 
122  import warnings 
123  import logging 
124   
125  from translate.lang import data 
126  from translate.misc import quote 
127  from translate.misc.typecheck import accepts, returns, IsOneOf 
128  from translate.storage import base 
129   
130  # the rstripeols convert dos <-> unix nicely as well 
131  # output will be appropriate for the platform 
132   
133  eol = "\n" 
134 135 136 @accepts(unicode, [unicode]) 137 @returns(IsOneOf(type(None), unicode), int) 138 -def _find_delimiter(line, delimiters):
139 """Find the type and position of the delimiter in a property line. 140 141 Property files can be delimeted by "=", ":" or whitespace (space for now). 142 We find the position of each delimiter, then find the one that appears 143 first. 144 145 @param line: A properties line 146 @type line: str 147 @param delimiters: valid delimiters 148 @type delimiters: list 149 @return: delimiter character and offset within L{line} 150 @rtype: Tuple (delimiter char, Offset Integer) 151 """ 152 delimiter_dict = {} 153 for delimiter in delimiters: 154 delimiter_dict[delimiter] = -1 155 delimiters = delimiter_dict 156 # Find the position of each delimiter type 157 for delimiter, pos in delimiters.iteritems(): 158 prewhitespace = len(line) - len(line.lstrip()) 159 pos = line.find(delimiter, prewhitespace) 160 while pos != -1: 161 if delimiters[delimiter] == -1 and line[pos-1] != u"\\": 162 delimiters[delimiter] = pos 163 break 164 pos = line.find(delimiter, pos + 1) 165 # Find the first delimiter 166 mindelimiter = None 167 minpos = -1 168 for delimiter, pos in delimiters.iteritems(): 169 if pos == -1 or delimiter == u" ": 170 continue 171 if minpos == -1 or pos < minpos: 172 minpos = pos 173 mindelimiter = delimiter 174 if mindelimiter is None and delimiters.get(u" ", -1) != -1: 175 # Use space delimiter if we found nothing else 176 return (u" ", delimiters[" "]) 177 if mindelimiter is not None and u" " in delimiters and delimiters[u" "] < delimiters[mindelimiter]: 178 # If space delimiter occurs earlier than ":" or "=" then it is the 179 # delimiter only if there are non-whitespace characters between it and 180 # the other detected delimiter. 181 if len(line[delimiters[u" "]:delimiters[mindelimiter]].strip()) > 0: 182 return (u" ", delimiters[u" "]) 183 return (mindelimiter, minpos)
184
185 186 -def find_delimeter(line):
187 """Spelling error that is kept around for in case someone relies on it. 188 189 Deprecated.""" 190 warnings.warn("deprecated use Dialect.find_delimiter instead", DeprecationWarning) 191 return _find_delimiter(line, DialectJava.delimiters)
192
193 194 @accepts(unicode) 195 @returns(bool) 196 -def is_line_continuation(line):
197 """Determine whether L{line} has a line continuation marker. 198 199 .properties files can be terminated with a backslash (\\) indicating 200 that the 'value' continues on the next line. Continuation is only 201 valid if there are an odd number of backslashses (an even number 202 would result in a set of N/2 slashes not an escape) 203 204 @param line: A properties line 205 @type line: str 206 @return: Does L{line} end with a line continuation 207 @rtype: Boolean 208 """ 209 pos = -1 210 count = 0 211 if len(line) == 0: 212 return False 213 # Count the slashes from the end of the line. Ensure we don't 214 # go into infinite loop. 215 while len(line) >= -pos and line[pos:][0] == "\\": 216 pos -= 1 217 count += 1 218 return (count % 2) == 1 # Odd is a line continuation, even is not
219
220 221 @accepts(unicode) 222 @returns(unicode) 223 -def _key_strip(key):
224 """Cleanup whitespace found around a key 225 226 @param key: A properties key 227 @type key: str 228 @return: Key without any uneeded whitespace 229 @rtype: str 230 """ 231 newkey = key.rstrip() 232 # If line now end in \ we put back the whitespace that was escaped 233 if newkey[-1:] == "\\": 234 newkey += key[len(newkey):len(newkey)+1] 235 return newkey.lstrip()
236 237 dialects = {} 238 default_dialect = "java"
239 240 241 -def register_dialect(dialect):
242 dialects[dialect.name] = dialect
243
244 245 -def get_dialect(dialect=default_dialect):
246 return dialects.get(dialect)
247
248 249 -class Dialect(object):
250 """Settings for the various behaviours in key=value files.""" 251 name = None 252 default_encoding = 'iso-8859-1' 253 delimiters = None 254 pair_terminator = u"" 255 key_wrap_char = u"" 256 value_wrap_char = u"" 257 drop_comments = [] 258
259 - def encode(cls, string, encoding=None):
260 """Encode the string""" 261 #FIXME: dialects are a bad idea, not possible for subclasses to override key methods 262 if encoding != "utf-8": 263 return quote.javapropertiesencode(string or u"") 264 return string or u""
265 encode = classmethod(encode) 266
267 - def find_delimiter(cls, line):
268 """Find the delimeter""" 269 return _find_delimiter(line, cls.delimiters)
270 find_delimiter = classmethod(find_delimiter) 271
272 - def key_strip(cls, key):
273 """Strip uneeded characters from the key""" 274 return _key_strip(key)
275 key_strip = classmethod(key_strip) 276
277 - def value_strip(cls, value):
278 """Strip uneeded characters from the value""" 279 return value.lstrip()
280 value_strip = classmethod(value_strip)
281
282 283 -class DialectJava(Dialect):
284 name = "java" 285 default_encoding = "iso-8859-1" 286 delimiters = [u"=", u":", u" "]
287 register_dialect(DialectJava)
288 289 290 -class DialectFlex(DialectJava):
291 name = "flex" 292 default_encoding = "utf-8"
293 register_dialect(DialectFlex)
294 295 296 -class DialectMozilla(Dialect):
297 name = "mozilla" 298 default_encoding = "utf-8" 299 delimiters = [u"="] 300
301 - def encode(cls, string, encoding=None):
302 return quote.mozillapropertiesencode(string or u"")
303 encode = classmethod(encode)
304 register_dialect(DialectMozilla)
305 306 307 -class DialectSkype(Dialect):
308 name = "skype" 309 default_encoding = "utf-16" 310 delimiters = [u"="] 311
312 - def encode(cls, string, encoding=None):
313 return quote.mozillapropertiesencode(string or u"")
314 encode = classmethod(encode)
315 register_dialect(DialectSkype)
316 317 318 -class DialectStrings(Dialect):
319 name = "strings" 320 default_encoding = "utf-16" 321 delimiters = [u"="] 322 pair_terminator = u";" 323 key_wrap_char = u'"' 324 value_wrap_char = u'"' 325 drop_comments = ["/* No comment provided by engineer. */"] 326
327 - def key_strip(cls, key):
328 """Strip uneeded characters from the key""" 329 newkey = key.rstrip().rstrip('"') 330 # If line now end in \ we put back the char that was escaped 331 if newkey[-1:] == "\\": 332 newkey += key[len(newkey):len(newkey)+1] 333 return newkey.lstrip().lstrip('"')
334 key_strip = classmethod(key_strip) 335
336 - def value_strip(cls, value):
337 """Strip uneeded characters from the value""" 338 newvalue = value.rstrip().rstrip(';').rstrip('"') 339 # If line now end in \ we put back the char that was escaped 340 if newvalue[-1:] == "\\": 341 newvalue += value[len(newvalue):len(newvalue)+1] 342 return newvalue.lstrip().lstrip('"')
343 value_strip = classmethod(value_strip) 344
345 - def encode(cls, string, encoding=None):
346 return string.replace('"', '\\"').replace("\n", r"\n").replace("\t", r"\t")
347 encode = classmethod(encode)
348 register_dialect(DialectStrings)
349 350 351 -class propunit(base.TranslationUnit):
352 """an element of a properties file i.e. a name and value, and any comments 353 associated""" 354
355 - def __init__(self, source="", personality="java"):
356 """construct a blank propunit""" 357 self.personality = get_dialect(personality) 358 super(propunit, self).__init__(source) 359 self.name = u"" 360 self.value = u"" 361 self.translation = u"" 362 self.delimiter = u"=" 363 self.comments = [] 364 self.source = source
365
366 - def setsource(self, source):
367 self._rich_source = None 368 source = data.forceunicode(source) 369 self.value = self.personality.encode(source or u"", self.encoding)
370
371 - def getsource(self):
372 value = quote.propertiesdecode(self.value) 373 return value
374 375 source = property(getsource, setsource) 376
377 - def settarget(self, target):
378 self._rich_target = None 379 target = data.forceunicode(target) 380 self.translation = self.personality.encode(target or u"", self.encoding)
381
382 - def gettarget(self):
383 translation = quote.propertiesdecode(self.translation) 384 translation = re.sub(u"\\\\ ", u" ", translation) 385 return translation
386 387 target = property(gettarget, settarget) 388
389 - def _get_encoding(self):
390 if self._store: 391 return self._store.encoding 392 else: 393 return self.personality.default_encoding
394 encoding = property(_get_encoding) 395
396 - def __str__(self):
397 """convert to a string. double check that unicode is handled somehow 398 here""" 399 source = self.getoutput() 400 assert isinstance(source, unicode) 401 return source.encode(self.encoding)
402
403 - def getoutput(self):
404 """convert the element back into formatted lines for a .properties 405 file""" 406 notes = self.getnotes() 407 if notes: 408 notes += u"\n" 409 if self.isblank(): 410 return notes + u"\n" 411 else: 412 self.value = self.personality.encode(self.source, self.encoding) 413 self.translation = self.personality.encode(self.target, self.encoding) 414 value = self.translation or self.value 415 return u"%(notes)s%(key)s%(del)s%(value)s\n" % {"notes": notes, 416 "key": self.name, 417 "del": self.delimiter, 418 "value": value}
419
420 - def getlocations(self):
421 return [self.name]
422
423 - def addnote(self, text, origin=None, position="append"):
424 if origin in ['programmer', 'developer', 'source code', None]: 425 text = data.forceunicode(text) 426 self.comments.append(text) 427 else: 428 return super(propunit, self).addnote(text, origin=origin, 429 position=position)
430
431 - def getnotes(self, origin=None):
432 if origin in ['programmer', 'developer', 'source code', None]: 433 return u'\n'.join(self.comments) 434 else: 435 return super(propunit, self).getnotes(origin)
436
437 - def removenotes(self):
438 self.comments = []
439
440 - def isblank(self):
441 """returns whether this is a blank element, containing only 442 comments.""" 443 return not (self.name or self.value)
444
445 - def istranslatable(self):
446 return bool(self.name)
447
448 - def getid(self):
449 return self.name
450
451 - def setid(self, value):
452 self.name = value
453
454 455 -class propfile(base.TranslationStore):
456 """this class represents a .properties file, made up of propunits""" 457 UnitClass = propunit 458
459 - def __init__(self, inputfile=None, personality="java", encoding=None):
460 """construct a propfile, optionally reading in from inputfile""" 461 super(propfile, self).__init__(unitclass=self.UnitClass) 462 self.personality = get_dialect(personality) 463 self.encoding = encoding or self.personality.default_encoding 464 self.filename = getattr(inputfile, 'name', '') 465 if inputfile is not None: 466 propsrc = inputfile.read() 467 inputfile.close() 468 self.parse(propsrc)
469
470 - def parse(self, propsrc):
471 """read the source of a properties file in and include them as units""" 472 text, encoding = self.detect_encoding(propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16']) 473 self.encoding = encoding 474 propsrc = text 475 476 newunit = propunit("", self.personality.name) 477 inmultilinevalue = False 478 479 for line in propsrc.split(u"\n"): 480 # handle multiline value if we're in one 481 line = quote.rstripeol(line) 482 if inmultilinevalue: 483 newunit.value += line.lstrip() 484 # see if there's more 485 inmultilinevalue = is_line_continuation(newunit.value) 486 # if we're still waiting for more... 487 if inmultilinevalue: 488 # strip the backslash 489 newunit.value = newunit.value[:-1] 490 if not inmultilinevalue: 491 # we're finished, add it to the list... 492 self.addunit(newunit) 493 newunit = propunit("", self.personality.name) 494 # otherwise, this could be a comment 495 # FIXME handle /* */ in a more reliable way 496 # FIXME handle // inline comments 497 elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/": 498 # add a comment 499 if line not in self.personality.drop_comments: 500 newunit.comments.append(line) 501 elif not line.strip(): 502 # this is a blank line... 503 if str(newunit).strip(): 504 self.addunit(newunit) 505 newunit = propunit("", self.personality.name) 506 else: 507 newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line) 508 if delimiter_pos == -1: 509 newunit.name = self.personality.key_strip(line) 510 newunit.value = u"" 511 self.addunit(newunit) 512 newunit = propunit("", self.personality.name) 513 else: 514 newunit.name = self.personality.key_strip(line[:delimiter_pos]) 515 if is_line_continuation(line[delimiter_pos+1:].lstrip()): 516 inmultilinevalue = True 517 newunit.value = line[delimiter_pos+1:].lstrip()[:-1] 518 else: 519 newunit.value = self.personality.value_strip(line[delimiter_pos+1:]) 520 self.addunit(newunit) 521 newunit = propunit("", self.personality.name) 522 # see if there is a leftover one... 523 if inmultilinevalue or len(newunit.comments) > 0: 524 self.addunit(newunit)
525
526 - def __str__(self):
527 """convert the units back to lines""" 528 lines = [] 529 for unit in self.units: 530 lines.append(str(unit)) 531 return "".join(lines)
532
533 534 -class javafile(propfile):
535 Name = _("Java Properties") 536 Exensions = ['properties'] 537
538 - def __init__(self, *args, **kwargs):
539 kwargs['personality'] = "java" 540 kwargs['encoding'] = "auto" 541 super(javafile, self).__init__(*args, **kwargs)
542
543 544 -class stringsfile(propfile):
545 Name = _("OS X Strings") 546 Extensions = ['strings'] 547
548 - def __init__(self, *args, **kwargs):
549 kwargs['personality'] = "strings" 550 super(stringsfile, self).__init__(*args, **kwargs)
551