1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 import re
32 import locale
33
34 from translate.storage import factory
35 from translate.storage.poheader import poheader
36 from translate.misc import optrecurse
37 from translate.misc.multistring import multistring
38 from translate.lang import data
39
40
42 """Just a small data structure that represents a search match."""
43
44
45 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
46 self.unit = unit
47 self.part = part
48 self.part_n = part_n
49 self.start = start
50 self.end = end
51
52
54 if self.part == 'target':
55 if self.unit.hasplural():
56 getter = lambda: self.unit.target.strings[self.part_n]
57 else:
58 getter = lambda: self.unit.target
59 return getter
60 elif self.part == 'source':
61 if self.unit.hasplural():
62 getter = lambda: self.unit.source.strings[self.part_n]
63 else:
64 getter = lambda: self.unit.source
65 return getter
66 elif self.part == 'notes':
67
68 def getter():
69 return self.unit.getnotes()[self.part_n]
70 return getter
71 elif self.part == 'locations':
72
73 def getter():
74 return self.unit.getlocations()[self.part_n]
75 return getter
76
78 if self.part == 'target':
79 if self.unit.hasplural():
80
81 def setter(value):
82 strings = self.unit.target.strings
83 strings[self.part_n] = value
84 self.unit.target = strings
85 else:
86
87 def setter(value):
88 self.unit.target = value
89 return setter
90
91
100
103
104
106 """Calculate the real index in the unnormalized string that corresponds to
107 the index nfc_index in the normalized string."""
108 length = nfc_index
109 max_length = len(string)
110 while len(data.normalize(string[:length])) <= nfc_index:
111 if length == max_length:
112 return length
113 length += 1
114 return length - 1
115
116
129
130
132
133 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
134 invertmatch=False, accelchar=None, encoding='utf-8',
135 max_matches=0):
136 """builds a checkfilter using the given checker"""
137 if isinstance(searchstring, unicode):
138 self.searchstring = searchstring
139 else:
140 self.searchstring = searchstring.decode(encoding)
141 self.searchstring = data.normalize(self.searchstring)
142 if searchparts:
143
144
145 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
146 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
147 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
148 self.search_locations = 'locations' in searchparts
149 else:
150 self.search_source = True
151 self.search_target = True
152 self.search_notes = False
153 self.search_locations = False
154 self.ignorecase = ignorecase
155 if self.ignorecase:
156 self.searchstring = self.searchstring.lower()
157 self.useregexp = useregexp
158 if self.useregexp:
159 self.searchpattern = re.compile(self.searchstring)
160 self.invertmatch = invertmatch
161 self.accelchar = accelchar
162 self.max_matches = max_matches
163
165 if teststr is None:
166 return False
167 teststr = data.normalize(teststr)
168 if self.ignorecase:
169 teststr = teststr.lower()
170 if self.accelchar:
171 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
172 teststr = re.sub(self.accelchar, "", teststr)
173 if self.useregexp:
174 found = self.searchpattern.search(teststr)
175 else:
176 found = teststr.find(self.searchstring) != -1
177 if self.invertmatch:
178 found = not found
179 return found
180
182 """runs filters on an element"""
183 if unit.isheader():
184 return []
185
186 if self.search_source:
187 if isinstance(unit.source, multistring):
188 strings = unit.source.strings
189 else:
190 strings = [unit.source]
191 for string in strings:
192 if self.matches(string):
193 return True
194
195 if self.search_target:
196 if isinstance(unit.target, multistring):
197 strings = unit.target.strings
198 else:
199 strings = [unit.target]
200 for string in strings:
201 if self.matches(string):
202 return True
203
204 if self.search_notes:
205 if self.matches(unit.getnotes()):
206 return True
207 if self.search_locations:
208 if self.matches(u" ".join(unit.getlocations())):
209 return True
210 return False
211
224
226 if not self.searchstring:
227 return [], []
228
229 searchstring = self.searchstring
230 flags = re.LOCALE | re.MULTILINE | re.UNICODE
231
232 if self.ignorecase:
233 flags |= re.IGNORECASE
234 if not self.useregexp:
235 searchstring = re.escape(searchstring)
236 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
237
238 matches = []
239 indexes = []
240
241 for index, unit in enumerate(units):
242 old_length = len(matches)
243
244 if self.search_target:
245 if unit.hasplural():
246 targets = unit.target.strings
247 else:
248 targets = [unit.target]
249 matches.extend(find_matches(unit, 'target', targets, self.re_search))
250 if self.search_source:
251 if unit.hasplural():
252 sources = unit.source.strings
253 else:
254 sources = [unit.source]
255 matches.extend(find_matches(unit, 'source', sources, self.re_search))
256 if self.search_notes:
257 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
258
259 if self.search_locations:
260 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
261
262
263
264
265 if self.max_matches and len(matches) > self.max_matches:
266 raise Exception("Too many matches found")
267
268 if len(matches) > old_length:
269 old_length = len(matches)
270 indexes.append(index)
271
272 return matches, indexes
273
274
276 """a specialized Option Parser for the grep tool..."""
277
279 """parses the command line options, handling implicit input/output args"""
280 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
281
282 if args:
283 options.searchstring = args[0]
284 args = args[1:]
285 else:
286 self.error("At least one argument must be given for the search string")
287 if args and not options.input:
288 if not options.output:
289 options.input = args[:-1]
290 args = args[-1:]
291 else:
292 options.input = args
293 args = []
294 if args and not options.output:
295 options.output = args[-1]
296 args = args[:-1]
297 if args:
298 self.error("You have used an invalid combination of --input, --output and freestanding args")
299 if isinstance(options.input, list) and len(options.input) == 1:
300 options.input = options.input[0]
301 return (options, args)
302
304 """sets the usage string - if usage not given, uses getusagestring for each option"""
305 if usage is None:
306 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
307 else:
308 super(GrepOptionParser, self).set_usage(usage)
309
318
319
320 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
321 """reads in inputfile, filters using checkfilter, writes to outputfile"""
322 fromfile = factory.getobject(inputfile)
323 tofile = checkfilter.filterfile(fromfile)
324 if tofile.isempty():
325 return False
326 outputfile.write(str(tofile))
327 return True
328
329
331 formats = {"po": ("po", rungrep), "pot": ("pot", rungrep),
332 "mo": ("mo", rungrep), "gmo": ("gmo", rungrep),
333 "tmx": ("tmx", rungrep),
334 "xliff": ("xliff", rungrep), "xlf": ("xlf", rungrep), "xlff": ("xlff", rungrep),
335 None: ("po", rungrep)}
336 parser = GrepOptionParser(formats)
337 parser.add_option("", "--search", dest="searchparts",
338 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment"],
339 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
340 parser.add_option("-I", "--ignore-case", dest="ignorecase",
341 action="store_true", default=False, help="ignore case distinctions")
342 parser.add_option("-e", "--regexp", dest="useregexp",
343 action="store_true", default=False, help="use regular expression matching")
344 parser.add_option("-v", "--invert-match", dest="invertmatch",
345 action="store_true", default=False, help="select non-matching lines")
346 parser.add_option("", "--accelerator", dest="accelchar",
347 action="store", type="choice", choices=["&", "_", "~"],
348 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
349 parser.set_usage()
350 parser.passthrough.append('checkfilter')
351 parser.description = __doc__
352 return parser
353
354
356 parser = cmdlineparser()
357 parser.run()
358
359
360 if __name__ == '__main__':
361 main()
362