1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Base classes for storage interfaces.
22
23 @organization: Zuza Software Foundation
24 @copyright: 2006-2009 Zuza Software Foundation
25 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
26 """
27
28 import logging
29 try:
30 import cPickle as pickle
31 except ImportError:
32 import pickle
33 from exceptions import NotImplementedError
34
35 import translate.i18n
36 from translate.misc.multistring import multistring
37 from translate.misc.typecheck import accepts, Self, IsOneOf
38 from translate.storage.placeables import StringElem, general, parse as rich_parse
39 from translate.storage.workflow import StateEnum as states
40
41
43 """Forces derived classes to override method."""
44
45 if type(method.im_self) == type(baseclass):
46
47 actualclass = method.im_self
48 else:
49 actualclass = method.im_class
50 if actualclass != baseclass:
51 raise NotImplementedError(
52 "%s does not reimplement %s as required by %s" % \
53 (actualclass.__name__, method.__name__, baseclass.__name__))
54
55
57
59 self.inner_exc = inner_exc
60
62 return repr(self.inner_exc)
63
64
66 """Base class for translation units.
67
68 Our concept of a I{translation unit} is influenced heavily by XLIFF:
69 U{http://www.oasis-open.org/committees/xliff/documents/xliff-specification.htm}
70
71 As such most of the method- and variable names borrows from XLIFF
72 terminology.
73
74 A translation unit consists of the following:
75 - A I{source} string. This is the original translatable text.
76 - A I{target} string. This is the translation of the I{source}.
77 - Zero or more I{notes} on the unit. Notes would typically be some
78 comments from a translator on the unit, or some comments originating
79 from the source code.
80 - Zero or more I{locations}. Locations indicate where in the original
81 source code this unit came from.
82 - Zero or more I{errors}. Some tools (eg. L{pofilter<filters.pofilter>})
83 can run checks on translations and produce error messages.
84
85 @group Source: *source*
86 @group Target: *target*
87 @group Notes: *note*
88 @group Locations: *location*
89 @group Errors: *error*
90 """
91
92 rich_parsers = []
93 """A list of functions to use for parsing a string into a rich string
94 tree."""
95
96
97 S_OBSOLETE = states.OBSOLETE
98 S_EMPTY = states.EMPTY
99 S_NEEDS_WORK = states.NEEDS_WORK
100 S_REJECTED = states.REJECTED
101 S_NEEDS_REVIEW = states.NEEDS_REVIEW
102 S_UNREVIEWED = states.UNREVIEWED
103 S_FINAL = states.FINAL
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127 STATE = {}
128
130 """Constructs a TranslationUnit containing the given source string."""
131 self.notes = ""
132 self._store = None
133 self.source = source
134 self._target = None
135 self._rich_source = None
136 self._rich_target = None
137 self._state_n = 0
138
140 """Compares two TranslationUnits.
141
142 @type other: L{TranslationUnit}
143 @param other: Another L{TranslationUnit}
144 @rtype: Boolean
145 @return: Returns True if the supplied TranslationUnit equals this unit.
146 """
147 return self.source == other.source and self.target == other.target
148
150 """Converts to a string representation that can be parsed back using
151 L{parsestring()}."""
152
153 store = getattr(self, "_store", None)
154 self._store = None
155 dump = pickle.dumps(self)
156 self._store = store
157 return dump
158
160 """Convert a "rich" string tree to a C{multistring}:
161
162 >>> from translate.storage.placeables.interfaces import X
163 >>> rich = [StringElem(['foo', X(id='xxx', sub=[' ']), 'bar'])]
164 >>> TranslationUnit.rich_to_multistring(rich)
165 multistring(u'foo bar')
166 """
167 return multistring([unicode(elem) for elem in elem_list])
168 rich_to_multistring = classmethod(rich_to_multistring)
169
171 """Convert a multistring to a list of "rich" string trees:
172
173 >>> target = multistring([u'foo', u'bar', u'baz'])
174 >>> TranslationUnit.multistring_to_rich(target)
175 [<StringElem([<StringElem([u'foo'])>])>,
176 <StringElem([<StringElem([u'bar'])>])>,
177 <StringElem([<StringElem([u'baz'])>])>]
178 """
179 if isinstance(mulstring, multistring):
180 return [rich_parse(s, self.rich_parsers) for s in mulstring.strings]
181 return [rich_parse(mulstring, self.rich_parsers)]
182
184 """Sets the source string to the given value."""
185 self._rich_source = None
186 self._source = source
187 source = property(lambda self: self._source, setsource)
188
190 """Sets the target string to the given value."""
191 self._rich_target = None
192 self._target = target
193 target = property(lambda self: self._target, settarget)
194
199
201 if not hasattr(value, '__iter__'):
202 raise ValueError('value must be iterable')
203 if len(value) < 1:
204 raise ValueError('value must have at least one element.')
205 if not isinstance(value[0], StringElem):
206 raise ValueError('value[0] must be of type StringElem.')
207 self._rich_source = list(value)
208 multi = self.rich_to_multistring(value)
209 if self.source != multi:
210 self.source = multi
211 rich_source = property(_get_rich_source, _set_rich_source)
212 """ @see: rich_to_multistring
213 @see: multistring_to_rich"""
214
219
221 if not hasattr(value, '__iter__'):
222 raise ValueError('value must be iterable')
223 if len(value) < 1:
224 raise ValueError('value must have at least one element.')
225 if not isinstance(value[0], StringElem):
226 raise ValueError('value[0] must be of type StringElem.')
227 self._rich_target = list(value)
228 self.target = self.rich_to_multistring(value)
229 rich_target = property(_get_rich_target, _set_rich_target)
230 """ @see: rich_to_multistring
231 @see: multistring_to_rich"""
232
234 """Returns the length of the target string.
235
236 @note: Plural forms might be combined.
237 @rtype: Integer
238 """
239 length = len(self.target or "")
240 strings = getattr(self.target, "strings", [])
241 if strings:
242 length += sum([len(pluralform) for pluralform in strings[1:]])
243 return length
244
246 """A unique identifier for this unit.
247
248 @rtype: string
249 @return: an identifier for this unit that is unique in the store
250
251 Derived classes should override this in a way that guarantees a unique
252 identifier for each unit in the store.
253 """
254 return self.source
255
257 """Sets the unique identified for this unit.
258
259 only implemented if format allows ids independant from other
260 unit properties like source or context"""
261 pass
262
264 """A list of source code locations.
265
266 @note: Shouldn't be implemented if the format doesn't support it.
267 @rtype: List
268 """
269 return []
270
272 """Add one location to the list of locations.
273
274 @note: Shouldn't be implemented if the format doesn't support it.
275 """
276 pass
277
279 """Add a location or a list of locations.
280
281 @note: Most classes shouldn't need to implement this,
282 but should rather implement L{addlocation()}.
283 @warning: This method might be removed in future.
284 """
285 if isinstance(location, list):
286 for item in location:
287 self.addlocation(item)
288 else:
289 self.addlocation(location)
290
291 - def getcontext(self):
292 """Get the message context."""
293 return ""
294
295 - def setcontext(self, context):
296 """Set the message context"""
297 pass
298
300 """Returns all notes about this unit.
301
302 It will probably be freeform text or something reasonable that can be
303 synthesised by the format.
304 It should not include location comments (see L{getlocations()}).
305 """
306 return getattr(self, "notes", "")
307
308 - def addnote(self, text, origin=None, position="append"):
309 """Adds a note (comment).
310
311 @type text: string
312 @param text: Usually just a sentence or two.
313 @type origin: string
314 @param origin: Specifies who/where the comment comes from.
315 Origin can be one of the following text strings:
316 - 'translator'
317 - 'developer', 'programmer', 'source code' (synonyms)
318 """
319 if position == "append" and getattr(self, "notes", None):
320 self.notes += '\n' + text
321 else:
322 self.notes = text
323
325 """Remove all the translator's notes."""
326 self.notes = u''
327
328 - def adderror(self, errorname, errortext):
329 """Adds an error message to this unit.
330
331 @type errorname: string
332 @param errorname: A single word to id the error.
333 @type errortext: string
334 @param errortext: The text describing the error.
335 """
336 pass
337
339 """Get all error messages.
340
341 @rtype: Dictionary
342 """
343 return {}
344
346 """Marks the unit to indicate whether it needs review.
347
348 @keyword needsreview: Defaults to True.
349 @keyword explanation: Adds an optional explanation as a note.
350 """
351 pass
352
354 """Indicates whether this unit is translated.
355
356 This should be used rather than deducing it from .target,
357 to ensure that other classes can implement more functionality
358 (as XLIFF does).
359 """
360 return bool(self.target) and not self.isfuzzy()
361
363 """Indicates whether this unit can be translated.
364
365 This should be used to distinguish real units for translation from
366 header, obsolete, binary or other blank units.
367 """
368 return bool(self.source)
369
371 """Indicates whether this unit is fuzzy."""
372 return False
373
375 """Marks the unit as fuzzy or not."""
376 pass
377
379 """indicate whether a unit is obsolete"""
380 return False
381
383 """Make a unit obsolete"""
384 pass
385
387 """Indicates whether this unit is a header."""
388 return False
389
391 """Indicates whether this unit needs review."""
392 return False
393
395 """Used to see if this unit has no source or target string.
396
397 @note: This is probably used more to find translatable units,
398 and we might want to move in that direction rather and get rid of this.
399 """
400 return not (self.source or self.target)
401
403 """Tells whether or not this specific unit has plural strings."""
404
405 return False
406
409
412
413 - def merge(self, otherunit, overwrite=False, comments=True,
414 authoritative=False):
418
420 """Iterator that only returns this unit."""
421 yield self
422
424 """This unit in a list."""
425 return [self]
426
428 """Build a native unit from a foreign unit, preserving as much
429 information as possible."""
430 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy):
431 return unit.copy()
432 newunit = cls(unit.source)
433 newunit.target = unit.target
434 newunit.markfuzzy(unit.isfuzzy())
435 locations = unit.getlocations()
436 if locations:
437 newunit.addlocations(locations)
438 notes = unit.getnotes()
439 if notes:
440 newunit.addnote(notes)
441 return newunit
442 buildfromunit = classmethod(buildfromunit)
443
444 xid = property(lambda self: None, lambda self, value: None)
445 rid = property(lambda self: None, lambda self, value: None)
446
448 if n is None:
449 n = self.get_state_n()
450 for state_id, state_range in self.STATE.iteritems():
451 if state_range[0] <= n < state_range[1]:
452 return state_id
453 if self.STATE:
454 raise ValueError('No state containing value %s' % (n))
455 else:
456 return n
457
463
465 self._state_n = value
466
468 """Empty method that should be overridden in sub-classes to infer the
469 current state(_n) of the unit from its current state."""
470 pass
471
472
474 """Base class for stores for multiple translation units of type
475 UnitClass."""
476
477 UnitClass = TranslationUnit
478 """The class of units that will be instantiated and used by this class"""
479 Name = "Base translation store"
480 """The human usable name of this store type"""
481 Mimetypes = None
482 """A list of MIME types associated with this store type"""
483 Extensions = None
484 """A list of file extentions associated with this store type"""
485 _binary = False
486 """Indicates whether a file should be accessed as a binary file."""
487 suggestions_in_format = False
488 """Indicates if format can store suggestions and alternative translation
489 for a unit"""
490
499
501 """Gets the source language for this store"""
502 return self.sourcelanguage
503
507
509 """Gets the target language for this store"""
510 return self.targetlanguage
511
515
517 """Gets the project type for this store"""
518 return getattr(self, '_project_style', None)
519
521 """Sets the project type for this store"""
522 self._project_style = project_style
523
525 """Iterator over all the units in this store."""
526 for unit in self.units:
527 yield unit
528
530 """Return a list of all units in this store."""
531 return [unit for unit in self.unit_iter()]
532
534 """Appends the given unit to the object's list of units.
535
536 This method should always be used rather than trying to modify the
537 list manually.
538
539 @type unit: L{TranslationUnit}
540 @param unit: The unit that will be added.
541 """
542 unit._store = self
543 self.units.append(unit)
544
546 """Adds and returns a new unit with the given source string.
547
548 @rtype: L{TranslationUnit}
549 """
550 unit = self.UnitClass(source)
551 self.addunit(unit)
552 return unit
553
555 """find unit with matching id by checking id_index"""
556 self.require_index()
557 return self.id_index.get(id, None)
558
560 """Finds the unit with the given source string.
561
562 @rtype: L{TranslationUnit} or None
563 """
564 if len(getattr(self, "sourceindex", [])):
565 if source in self.sourceindex:
566 return self.sourceindex[source][0]
567 else:
568 for unit in self.units:
569 if unit.source == source:
570 return unit
571 return None
572
574 """Finds the units with the given source string.
575
576 @rtype: L{TranslationUnit} or None
577 """
578 if len(getattr(self, "sourceindex", [])):
579 if source in self.sourceindex:
580 return self.sourceindex[source]
581 else:
582
583
584 result = []
585 for unit in self.units:
586 if unit.source == source:
587 result.append(unit)
588 return result
589 return None
590
592 """Returns the translated string for a given source string.
593
594 @rtype: String or None
595 """
596 unit = self.findunit(source)
597 if unit and unit.target:
598 return unit.target
599 else:
600 return None
601
603 """Remove a unit from source and locaton indexes"""
604
605 def remove_unit(source):
606 if source in self.sourceindex:
607 try:
608 self.sourceindex[source].remove(unit)
609 if len(self.sourceindex[source]) == 0:
610 del(self.sourceindex[source])
611 except ValueError:
612 pass
613
614 if unit.hasplural():
615 for source in unit.source.strings:
616 remove_unit(source)
617 else:
618 remove_unit(unit.source)
619
620 for location in unit.getlocations():
621 if location in self.locationindex \
622 and self.locationindex[location] is not None \
623 and self.locationindex[location] == unit:
624 del(self.locationindex[location])
625
627 """Add a unit to source and location idexes"""
628 self.id_index[unit.getid()] = unit
629
630 def insert_unit(source):
631 if not source in self.sourceindex:
632 self.sourceindex[source] = [unit]
633 else:
634 self.sourceindex[source].append(unit)
635
636 if unit.hasplural():
637 for source in unit.source.strings:
638 insert_unit(source)
639 else:
640 insert_unit(unit.source)
641
642 for location in unit.getlocations():
643 if location in self.locationindex:
644
645
646 self.locationindex[location] = None
647 else:
648 self.locationindex[location] = unit
649
651 """Indexes the items in this store. At least .sourceindex should be
652 usefull."""
653 self.locationindex = {}
654 self.sourceindex = {}
655 self.id_index = {}
656 for index, unit in enumerate(self.units):
657 unit.index = index
658 if not (unit.isheader() or unit.isblank()):
659 self.add_unit_to_index(unit)
660
662 """make sure source index exists"""
663 if not hasattr(self, "id_index"):
664 self.makeindex()
665
666 - def getids(self, filename=None):
667 """return a list of unit ids"""
668 self.require_index()
669 return self.id_index.keys()
670
672 odict = self.__dict__.copy()
673 odict['fileobj'] = None
674 return odict
675
677 self.__dict__.update(dict)
678 if getattr(self, "filename", False):
679 self.fileobj = open(self.filename)
680
682 """Converts to a string representation that can be parsed back using
683 L{parsestring()}."""
684
685 fileobj = getattr(self, "fileobj", None)
686 self.fileobj = None
687 dump = pickle.dumps(self)
688 self.fileobj = fileobj
689 return dump
690
692 """Returns True if the object doesn't contain any translation units."""
693 if len(self.units) == 0:
694 return True
695 for unit in self.units:
696 if unit.istranslatable():
697 return False
698 return True
699
701 """Tries to work out what the name of the filesystem file is and
702 assigns it to .filename."""
703 fileobj = getattr(self, "fileobj", None)
704 if fileobj:
705 filename = getattr(fileobj, "name",
706 getattr(fileobj, "filename", None))
707 if filename:
708 self.filename = filename
709
711 """Converts the string representation back to an object."""
712 newstore = cls()
713 if storestring:
714 newstore.parse(storestring)
715 return newstore
716 parsestring = classmethod(parsestring)
717
719 if not default_encodings:
720 default_encodings = ['utf-8']
721 try:
722 import chardet
723 detected_encoding = chardet.detect(text)
724 if detected_encoding['confidence'] < 0.48:
725 detected_encoding = None
726 if detected_encoding == 'ascii':
727 detected_encoding = 'utf-8'
728 except ImportError:
729 detected_encoding = None
730
731 encodings = []
732 if self.encoding == 'auto':
733 if detected_encoding and detected_encoding['encoding'] not in encodings:
734 encodings.append(detected_encoding['encoding'])
735 for encoding in default_encodings:
736 if encoding not in encodings:
737 encodings.append(encoding)
738 else:
739 encodings.append(self.encoding)
740 if detected_encoding and detected_encoding['encoding'] != self.encoding:
741 logging.warn("trying to parse %s with encoding: %s but detected encoding is %s",
742 self.filename, self.encoding, detected_encoding['encoding'])
743 encodings.append(self.encoding)
744
745 for encoding in encodings:
746 try:
747 r_text = unicode(text, encoding)
748 r_encoding = encoding
749 break
750 except UnicodeDecodeError:
751 r_text = None
752 r_encoding = None
753 if r_encoding == 'ascii':
754 r_encoding = 'utf-8'
755 return r_text, r_encoding
756
758 """parser to process the given source string"""
759 self.units = pickle.loads(data).units
760
762 """Writes the string representation to the given file (or filename)."""
763 storestring = str(self)
764 if isinstance(storefile, basestring):
765 mode = 'w'
766 if self._binary:
767 mode = 'wb'
768 storefile = open(storefile, mode)
769 self.fileobj = storefile
770 self._assignname()
771 storefile.write(storestring)
772 storefile.close()
773
775 """Save to the file that data was originally read from, if
776 available."""
777 fileobj = getattr(self, "fileobj", None)
778 mode = 'w'
779 if self._binary:
780 mode = 'wb'
781 if not fileobj:
782 filename = getattr(self, "filename", None)
783 if filename:
784 fileobj = file(filename, mode)
785 else:
786 fileobj.close()
787 filename = getattr(fileobj, "name",
788 getattr(fileobj, "filename", None))
789 if not filename:
790 raise ValueError("No file or filename to save to")
791 fileobj = fileobj.__class__(filename, mode)
792 self.savefile(fileobj)
793
795 """Reads the given file (or opens the given filename) and parses back
796 to an object."""
797 mode = 'r'
798 if cls._binary:
799 mode = 'rb'
800 if isinstance(storefile, basestring):
801 storefile = open(storefile, mode)
802 mode = getattr(storefile, "mode", mode)
803
804 if mode == 1 or "r" in mode:
805 storestring = storefile.read()
806 storefile.close()
807 else:
808 storestring = ""
809 newstore = cls.parsestring(storestring)
810 newstore.fileobj = storefile
811 newstore._assignname()
812 return newstore
813 parsefile = classmethod(parsefile)
814