Package translate :: Package storage :: Module txt
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.txt

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This class implements the functionality for handling plain text files, or 
 23  similar wiki type files. 
 24   
 25  Supported formats are 
 26    - Plain text 
 27    - dokuwiki 
 28    - MediaWiki 
 29  """ 
 30   
 31  import re 
 32   
 33  from translate.storage import base 
 34   
 35  dokuwiki = [] 
 36  dokuwiki.append(("Dokuwiki heading", re.compile(r"( ?={2,6}[\s]*)(.+)"), re.compile("([\s]*={2,6}[\s]*)$"))) 
 37  dokuwiki.append(("Dokuwiki bullet", re.compile(r"([\s]{2,}\*[\s]*)(.+)"), re.compile("[\s]+$"))) 
 38  dokuwiki.append(("Dokuwiki numbered item", re.compile(r"([\s]{2,}-[\s]*)(.+)"), re.compile("[\s]+$"))) 
 39   
 40  mediawiki = [] 
 41  mediawiki.append(("MediaWiki heading", re.compile(r"(={1,5}[\s]*)(.+)"), re.compile("([\s]*={1,5}[\s]*)$"))) 
 42  mediawiki.append(("MediaWiki bullet", re.compile(r"(\*+[\s]*)(.+)"), re.compile("[\s]+$"))) 
 43  mediawiki.append(("MediaWiki numbered item", re.compile(r"(#+[\s]*)(.+)"), re.compile("[\s]+$"))) 
 44   
 45  flavours = { 
 46  "dokuwiki": dokuwiki, 
 47  "mediawiki": mediawiki, 
 48  None: [], 
 49  "plain": [], 
 50  } 
 51   
 52   
53 -class TxtUnit(base.TranslationUnit):
54 """This class represents a block of text from a text file""" 55
56 - def __init__(self, source="", encoding="utf-8"):
57 """Construct the txtunit""" 58 self.encoding = encoding 59 super(TxtUnit, self).__init__(source) 60 self.source = source 61 self.pretext = "" 62 self.posttext = "" 63 self.location = []
64
65 - def __str__(self):
66 """Convert a txt unit to a string""" 67 string = u"".join([self.pretext, self.source, self.posttext]) 68 if isinstance(string, unicode): 69 return string.encode(self.encoding) 70 return string
71 72 # Note that source and target are equivalent for monolingual units
73 - def setsource(self, source):
74 """Sets the definition to the quoted value of source""" 75 if isinstance(source, str): 76 source = source.decode(self.encoding) 77 self._rich_source = None 78 self._source = source
79
80 - def getsource(self):
81 """gets the unquoted source string""" 82 return self._source
83 source = property(getsource, setsource) 84
85 - def settarget(self, target):
86 """Sets the definition to the quoted value of target""" 87 self._rich_target = None 88 self.source = target
89
90 - def gettarget(self):
91 """gets the unquoted target string""" 92 return self.source
93 target = property(gettarget, settarget) 94
95 - def addlocation(self, location):
96 self.location.append(location)
97
98 - def getlocations(self):
99 return self.location
100 101
102 -class TxtFile(base.TranslationStore):
103 """This class represents a text file, made up of txtunits""" 104 UnitClass = TxtUnit 105
106 - def __init__(self, inputfile=None, flavour=None, encoding="utf-8"):
107 base.TranslationStore.__init__(self, unitclass=self.UnitClass) 108 self.filename = getattr(inputfile, 'name', '') 109 self.flavour = flavours.get(flavour, []) 110 if inputfile is not None: 111 txtsrc = inputfile.readlines() 112 self.parse(txtsrc) 113 self.encoding = encoding
114
115 - def parse(self, lines):
116 """Read in text lines and create txtunits from the blocks of text""" 117 block = [] 118 startline = 0 119 pretext = "" 120 posttext = "" 121 if not isinstance(lines, list): 122 lines = lines.split("\n") 123 for linenum in range(len(lines)): 124 line = lines[linenum].rstrip("\n").rstrip("\r") 125 for rule, prere, postre in self.flavour: 126 match = prere.match(line) 127 if match: 128 pretext, source = match.groups() 129 postmatch = postre.search(source) 130 if postmatch: 131 posttext = postmatch.group() 132 source = source[:postmatch.start()] 133 block.append(source) 134 isbreak = True 135 break 136 else: 137 isbreak = not line.strip() 138 if isbreak and block: 139 unit = self.addsourceunit("\n".join(block)) 140 unit.addlocation("%s:%d" % (self.filename, startline + 1)) 141 unit.pretext = pretext 142 unit.posttext = posttext 143 pretext = "" 144 posttext = "" 145 block = [] 146 elif not isbreak: 147 if not block: 148 startline = linenum 149 block.append(line) 150 if block: 151 unit = self.addsourceunit("\n".join(block)) 152 unit.addlocation("%s:%d" % (self.filename, startline + 1))
153
154 - def __str__(self):
155 source = self.getoutput() 156 if isinstance(source, unicode): 157 return source.encode(getattr(self, "encoding", "UTF-8")) 158 return source
159
160 - def getoutput(self):
161 """Convert the units back to blocks""" 162 blocks = [str(unit) for unit in self.units] 163 string = "\n\n".join(blocks) 164 return string
165