Package translate :: Package storage :: Module qm
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.qm

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007-2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """Module for parsing Qt .qm files 
 24   
 25  @note: based on documentation from Gettext's .qm implementation 
 26  (see write-qt.c) and on observation of the output of lrelease. 
 27  @note: Certain deprecated section tags are not implemented.  These will break 
 28  and print out the missing tag.  They are easy to implement and should follow 
 29  the structure in 03 (Translation).  We could find no examples that use these 
 30  so we'd rather leave it unimplemented until we actually have test data. 
 31  @note: Many .qm files are unable to be parsed as they do not have the source 
 32  text.  We assume that since they use a hash table to lookup the data there is 
 33  actually no need for the source text.  It seems however that in Qt4's lrelease 
 34  all data is included in the resultant .qm file. 
 35  @todo: We can only parse, not create, a .qm file.  The main issue is that we 
 36  need to implement the hashing algorithm (which seems to be identical to the 
 37  Gettext hash algorithm).  Unlike Gettext it seems that the hash is required, 
 38  but that has not been validated. 
 39  @todo: The code can parse files correctly.  But it could be cleaned up to be 
 40  more readable, especially the part that breaks the file into sections. 
 41   
 42  U{http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/qm.cpp} 
 43  U{Plural information<http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/numerus.cpp>} 
 44  U{QLocale languages<http://docs.huihoo.com/qt/4.5/qlocale.html#Language-enum>} 
 45  """ 
 46   
 47  import codecs 
 48  import struct 
 49  import sys 
 50   
 51  from translate.misc.multistring import multistring 
 52  from translate.storage import base 
 53   
 54  QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL) 
 55   
 56   
57 -def qmunpack(file_='messages.qm'):
58 """Helper to unpack Qt .qm files into a Python string""" 59 f = open(file_) 60 s = f.read() 61 print "\\x%02x" * len(s) % tuple(map(ord, s)) 62 f.close()
63 64
65 -class qmunit(base.TranslationUnit):
66 """A class representing a .qm translation message.""" 67
68 - def __init__(self, source=None):
69 super(qmunit, self).__init__(source)
70 71
72 -class qmfile(base.TranslationStore):
73 """A class representing a .qm file.""" 74 UnitClass = qmunit 75 Name = _("Qt .qm file") 76 Mimetypes = ["application/x-qm"] 77 Extensions = ["qm"] 78 _binary = True 79
80 - def __init__(self, inputfile=None, unitclass=qmunit):
81 self.UnitClass = unitclass 82 base.TranslationStore.__init__(self, unitclass=unitclass) 83 self.units = [] 84 self.filename = '' 85 if inputfile is not None: 86 self.parsestring(inputfile)
87
88 - def __str__(self):
89 """Output a string representation of the .qm data file""" 90 raise Exception("Writing of .qm files is not supported yet")
91
92 - def parse(self, input):
93 """parses the given file or file source string""" 94 if hasattr(input, 'name'): 95 self.filename = input.name 96 elif not getattr(self, 'filename', ''): 97 self.filename = '' 98 if hasattr(input, "read"): 99 qmsrc = input.read() 100 input.close() 101 input = qmsrc 102 if len(input) < 16: 103 raise ValueError("This is not a .qm file: file empty or too small") 104 magic = struct.unpack(">4L", input[:16]) 105 if magic != QM_MAGIC_NUMBER: 106 raise ValueError("This is not a .qm file: invalid magic number") 107 startsection = 16 108 sectionheader = 5 109 110 def section_debug(name, section_type, startsection, length): 111 print "Section: %s (type: %#x, offset: %#x, length: %d)" % (name, section_type, startsection, length) 112 return
113 114 while startsection < len(input): 115 section_type, length = struct.unpack(">BL", input[startsection:startsection + sectionheader]) 116 if section_type == 0x42: 117 #section_debug("Hash", section_type, startsection, length) 118 hashash = True 119 hash_start = startsection + sectionheader 120 hash_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length]) 121 elif section_type == 0x69: 122 #section_debug("Messages", section_type, startsection, length) 123 hasmessages = True 124 messages_start = startsection + sectionheader 125 messages_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length]) 126 elif section_type == 0x2f: 127 #section_debug("Contexts", section_type, startsection, length) 128 hascontexts = True 129 contexts_start = startsection + sectionheader 130 contexts_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length]) 131 elif section_type == 0x88: 132 #section_debug("NumerusRules", section_type, startsection, length) 133 hasnumerusrules = True 134 numerusrules_start = startsection + sectionheader 135 numerusrules_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length]) 136 else: 137 section_debug("Unkown", section_type, startsection, length) 138 startsection = startsection + sectionheader + length 139 pos = messages_start 140 source = target = None 141 while pos < messages_start + len(messages_data): 142 subsection, = struct.unpack(">B", input[pos:pos + 1]) 143 if subsection == 0x01: # End 144 #print "End" 145 pos = pos + 1 146 if not source is None and not target is None: 147 newunit = self.addsourceunit(source) 148 newunit.target = target 149 source = target = None 150 else: 151 raise ValueError("Old .qm format with no source defined") 152 continue 153 #print pos, subsection 154 pos = pos + 1 155 length, = struct.unpack(">l", input[pos:pos + 4]) 156 if subsection == 0x03: # Translation 157 if length != -1: 158 raw, = struct.unpack(">%ds" % length, 159 input[pos + 4:pos + 4 + length]) 160 string, templen = codecs.utf_16_be_decode(raw) 161 if target: 162 target.strings.append(string) 163 else: 164 target = multistring(string) 165 pos = pos + 4 + length 166 else: 167 target = u"" 168 pos = pos + 4 169 #print "Translation: %s" % target.encode('utf-8') 170 elif subsection == 0x06: # SourceText 171 source = input[pos + 4:pos + 4 + length].decode('iso-8859-1') 172 #print "SourceText: %s" % source 173 pos = pos + 4 + length 174 elif subsection == 0x07: # Context 175 context = input[pos + 4:pos + 4 + length].decode('iso-8859-1') 176 #print "Context: %s" % context 177 pos = pos + 4 + length 178 elif subsection == 0x08: # Disambiguating-comment 179 comment = input[pos + 4:pos + 4 + length] 180 #print "Disambiguating-comment: %s" % comment 181 pos = pos + 4 + length 182 elif subsection == 0x05: # hash 183 hash = input[pos:pos + 4] 184 #print "Hash: %s" % hash 185 pos = pos + 4 186 else: 187 if subsection == 0x02: # SourceText16 188 subsection_name = "SourceText16" 189 elif subsection == 0x04: # Context16 190 subsection_name = "Context16" 191 else: 192 subsection_name = "Unkown" 193 print >> sys.stderr, "Unimplemented: %s %s" % \ 194 (subsection, subsection_name) 195 return
196
197 - def savefile(self, storefile):
198 raise Exception("Writing of .qm files is not supported yet")
199