1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import re
23
24 """
25 From the GNU gettext manual:
26 WHITE-SPACE
27 # TRANSLATOR-COMMENTS
28 #. AUTOMATIC-COMMENTS
29 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
30 #: REFERENCE...
31 #, FLAG...
32 msgctxt CONTEXT (Gettext 0.15)
33 msgid UNTRANSLATED-STRING
34 msgstr TRANSLATED-STRING
35 """
36
37 isspace = str.isspace
38 find = str.find
39 rfind = str.rfind
40 startswith = str.startswith
41 append = list.append
42 decode = str.decode
43
44
46
47 - def __init__(self, input_iterator, UnitClass, encoding=None):
48 self._input_iterator = input_iterator
49 self.next_line = ''
50 self.eof = False
51 self.encoding = encoding
52 self.read_line()
53 self.UnitClass = UnitClass
54
60
62 current = self.next_line
63 if self.eof:
64 return current
65 try:
66 self.next_line = self._input_iterator.next()
67 while not self.eof and isspace(self.next_line):
68 self.next_line = self._input_iterator.next()
69 except StopIteration:
70 self.next_line = ''
71 self.eof = True
72 return current
73
76
77
79 """Read all the lines belonging starting with #|. These lines contain
80 the previous msgid and msgctxt info. We strip away the leading '#| '
81 and read until we stop seeing #|."""
82 prevmsgid_lines = []
83 next_line = parse_state.next_line
84 while startswith(next_line, '#| ') or startswith(next_line, '| '):
85 content = parse_state.read_line()
86 prefix_len = content.index('| ')
87 content = content[prefix_len+2:]
88 append(prevmsgid_lines, content)
89 next_line = parse_state.next_line
90 return prevmsgid_lines
91
92
94 parse_message(parse_state, 'msgctxt', 7, unit.prev_msgctxt)
95 return len(unit.prev_msgctxt) > 0
96
97
99 parse_message(parse_state, 'msgid', 5, unit.prev_msgid)
100 return len(unit.prev_msgid) > 0
101
102
104 parse_message(parse_state, 'msgid_plural', 12, unit.prev_msgid_plural)
105 return len(unit.prev_msgid_plural) > 0
106
107
139
140
148
149
151 """Read all the lines belonging to the current unit if obsolete."""
152 obsolete_lines = []
153 next_line = parse_state.next_line
154 while startswith(next_line, '#~'):
155 content = parse_state.read_line()[2:].lstrip()
156 append(obsolete_lines, content)
157 next_line = parse_state.next_line
158 if startswith(content, 'msgstr'):
159
160
161 while startswith(next_line, '#~ "') or startswith(next_line, '#~ msgstr'):
162 content = parse_state.read_line()[3:]
163 append(obsolete_lines, content)
164 next_line = parse_state.next_line
165 break
166 return obsolete_lines
167
168
177
178
180 line = parse_state.next_line
181 left = find(line, '"', start_pos)
182 if left == start_pos or isspace(line[start_pos:left]):
183 right = rfind(line, '"')
184 if left != right:
185 return parse_state.read_line()[left:right+1]
186 else:
187
188
189 return parse_state.read_line()[left:-1] + '"'
190 return None
191
192
200
201
203 string = parse_quoted(parse_state, first_start_pos)
204 while string is not None:
205 if not startswith(string, '"_:'):
206 append(msg_list, parse_state.decode(string))
207 string = parse_quoted(parse_state)
208 else:
209 string = parse_msg_comment(parse_state, msg_comment_list, string)
210
211
212 -def parse_message(parse_state, start_of_string, start_of_string_len, msg_list, msg_comment_list=None):
213 if msg_comment_list is None:
214 msg_comment_list = []
215 if startswith(parse_state.next_line, start_of_string):
216 return parse_multiple_quoted(parse_state, msg_list, msg_comment_list, start_of_string_len)
217
218
220 parse_message(parse_state, 'msgctxt', 7, unit.msgctxt)
221 return len(unit.msgctxt) > 0
222
223
225 parse_message(parse_state, 'msgid', 5, unit.msgid, unit.msgidcomments)
226 return len(unit.msgid) > 0 or len(unit.msgidcomments) > 0
227
228
230 parse_message(parse_state, 'msgstr', 6, unit.msgstr)
231 return len(unit.msgstr) > 0
232
233
237
238 MSGSTR_ARRAY_ENTRY_LEN = len('msgstr[')
239
240
241 -def add_to_dict(msgstr_dict, line, right_bracket_pos, entry):
242 index = int(line[MSGSTR_ARRAY_ENTRY_LEN:right_bracket_pos])
243 if index not in msgstr_dict:
244 msgstr_dict[index] = []
245 msgstr_dict[index].extend(entry)
246
247
248 -def get_entry(parse_state, right_bracket_pos):
249 entry = []
250 parse_message(parse_state, 'msgstr[', right_bracket_pos + 1, entry)
251 return entry
252
253
254 -def parse_msgstr_array_entry(parse_state, msgstr_dict):
255 line = parse_state.next_line
256 right_bracket_pos = find(line, ']', MSGSTR_ARRAY_ENTRY_LEN)
257 if right_bracket_pos >= 0:
258 entry = get_entry(parse_state, right_bracket_pos)
259 if len(entry) > 0:
260 add_to_dict(msgstr_dict, line, right_bracket_pos, entry)
261 return True
262 else:
263 return False
264 else:
265 return False
266
267
277
278
285
286
294
295
297 unit = unit or parse_state.UnitClass()
298 parsed_comments = parse_comments(parse_state, unit)
299 obsolete_unit = parse_obsolete(parse_state, unit)
300 if obsolete_unit is not None:
301 return obsolete_unit
302 parsed_msg_entries = parse_msg_entries(parse_state, unit)
303 if parsed_comments or parsed_msg_entries:
304 unit.infer_state()
305 return unit
306 else:
307 return None
308
309
311 charset = None
312 if isinstance(unit.msgstr, list) and len(unit.msgstr) > 0 and isinstance(unit.msgstr[0], str):
313 charset = re.search("charset=([^\\s\\\\n]+)", "".join(unit.msgstr))
314 if charset:
315 encoding = charset.group(1)
316 if encoding != 'CHARSET':
317 store._encoding = encoding
318 else:
319 store._encoding = 'utf-8'
320 else:
321 store._encoding = 'utf-8'
322 parse_state.encoding = store._encoding
323
324
326 return [decode(item) for item in lst]
327
328
330 for attr in ('msgctxt', 'msgid', 'msgid_pluralcomments',
331 'msgid_plural', 'msgstr',
332 'othercomments', 'automaticcomments', 'sourcecomments',
333 'typecomments', 'msgidcomments'):
334 element = getattr(unit, attr)
335 if isinstance(element, list):
336 setattr(unit, attr, decode_list(element, decode))
337 else:
338 setattr(unit, attr, dict([(key, decode_list(value, decode)) for key, value in element.items()]))
339
340
348
349
351 unit = parse_header(parse_state, store)
352 while unit:
353 store.addunit(unit)
354 unit = parse_unit(parse_state)
355 return parse_state.eof
356