1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'an': (u'Aragonese', 2, '(n != 1)'),
34 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
35 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
36 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'),
37 'az': (u'Azerbaijani', 2, '(n != 1)'),
38 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'bg': (u'Bulgarian', 2, '(n != 1)'),
40 'bn': (u'Bengali', 2, '(n != 1)'),
41 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
42 'bo': (u'Tibetan', 1, '0'),
43 'br': (u'Breton', 2, 'n > 1'),
44 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
46 'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'),
47 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
48 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
49 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
50 'da': (u'Danish', 2, '(n != 1)'),
51 'de': (u'German', 2, '(n != 1)'),
52 'dz': (u'Dzongkha', 1, '0'),
53 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
54 'en': (u'English', 2, '(n != 1)'),
55 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
56 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
57 'eo': (u'Esperanto', 2, '(n != 1)'),
58 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
59 'et': (u'Estonian', 2, '(n != 1)'),
60 'eu': (u'Basque', 2, '(n != 1)'),
61 'fa': (u'Persian', 1, '0'),
62 'fi': (u'Finnish', 2, '(n != 1)'),
63 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
64 'fo': (u'Faroese', 2, '(n != 1)'),
65 'fr': (u'French', 2, '(n > 1)'),
66 'fur': (u'Friulian', 2, '(n != 1)'),
67 'fy': (u'Frisian', 2, '(n != 1)'),
68 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
69 'gd': (u'Gaelic; Scottish Gaelic', 2, 'nplurals=4; plural=(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : (n > 2 && n < 20) ? 2 : 3'),
70 'gl': (u'Galician', 2, '(n != 1)'),
71 'gu': (u'Gujarati', 2, '(n != 1)'),
72 'gun': (u'Gun', 2, '(n > 1)'),
73 'ha': (u'Hausa', 2, '(n != 1)'),
74 'he': (u'Hebrew', 2, '(n != 1)'),
75 'hi': (u'Hindi', 2, '(n != 1)'),
76 'hy': (u'Armenian', 1, '0'),
77 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
78 'hu': (u'Hungarian', 2, '(n != 1)'),
79 'ia': (u"Interlingua (International Auxiliary Language Association)", 2, '(n != 1)'),
80 'id': (u'Indonesian', 1, '0'),
81 'is': (u'Icelandic', 2, '(n != 1)'),
82 'it': (u'Italian', 2, '(n != 1)'),
83 'ja': (u'Japanese', 1, '0'),
84 'jv': (u'Javanese', 2, '(n != 1)'),
85 'ka': (u'Georgian', 1, '0'),
86 'kk': (u'Kazakh', 1, '0'),
87 'km': (u'Central Khmer', 1, '0'),
88 'kn': (u'Kannada', 2, '(n != 1)'),
89 'ko': (u'Korean', 1, '0'),
90 'ku': (u'Kurdish', 2, '(n != 1)'),
91 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
92 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
93 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
94 'ln': (u'Lingala', 2, '(n > 1)'),
95 'lo': (u'Lao', 1, '0'),
96 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
97 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
98 'mai': (u'Maithili', 2, '(n != 1)'),
99 'mfe': (u'Morisyen', 2, '(n > 1)'),
100 'mg': (u'Malagasy', 2, '(n > 1)'),
101 'mi': (u'Maori', 2, '(n > 1)'),
102 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
103 'ml': (u'Malayalam', 2, '(n != 1)'),
104 'mn': (u'Mongolian', 2, '(n != 1)'),
105 'mr': (u'Marathi', 2, '(n != 1)'),
106 'ms': (u'Malay', 1, '0'),
107 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
108 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
109 'nap': (u'Neapolitan', 2, '(n != 1)'),
110 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
111 'ne': (u'Nepali', 2, '(n != 1)'),
112 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
113 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
114 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n != 1)'),
115 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
116 'or': (u'Oriya', 2, '(n != 1)'),
117 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
118 'pap': (u'Papiamento', 2, '(n != 1)'),
119 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
120 'pms': (u'Piemontese', 2, '(n != 1)'),
121 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
122 'pt': (u'Portuguese', 2, '(n != 1)'),
123 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
124 'rm': (u'Romansh', 2, '(n != 1)'),
125 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
126 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
127 'sco': (u'Scots', 2, '(n != 1)'),
128 'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'),
129 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
130 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
131 'so': (u'Somali', 2, '(n != 1)'),
132 'sq': (u'Albanian', 2, '(n != 1)'),
133 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
134 'st': (u'Sotho, Southern', 2, '(n != 1)'),
135 'su': (u'Sundanese', 1, '0'),
136 'sv': (u'Swedish', 2, '(n != 1)'),
137 'sw': (u'Swahili', 2, '(n != 1)'),
138 'ta': (u'Tamil', 2, '(n != 1)'),
139 'te': (u'Telugu', 2, '(n != 1)'),
140 'tg': (u'Tajik', 2, '(n != 1)'),
141 'ti': (u'Tigrinya', 2, '(n > 1)'),
142 'th': (u'Thai', 1, '0'),
143 'tk': (u'Turkmen', 2, '(n != 1)'),
144 'tr': (u'Turkish', 1, '0'),
145 'tt': (u'Tatar', 1, '0'),
146 'ug': (u'Uighur; Uyghur', 1, '0'),
147 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
148 'vi': (u'Vietnamese', 1, '0'),
149 'wa': (u'Walloon', 2, '(n > 1)'),
150
151
152
153 'zh_CN': (u'Chinese (China)', 1, '0'),
154 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
155 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
156 'zu': (u'Zulu', 2, '(n != 1)'),
157 }
158 """Dictionary of language data.
159 The language code is the dictionary key (which may contain country codes and modifiers).
160 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
161
162 Note that the English names should not be used in user facing places - it
163 should always be passed through the function returned from tr_lang(), or at
164 least passed through _fix_language_name()."""
165
166 _fixed_names = {
167 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
168 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
169 u"Catalan; Valencian": u"Catalan",
170 u"Central Khmer": u"Khmer",
171 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
172 u"Divehi; Dhivehi; Maldivian": u"Divehi",
173 u"Dutch; Flemish": u"Dutch",
174 u"Filipino; Pilipino": u"Filipino",
175 u"Gaelic; Scottish Gaelic": u"Scottish Gaelic",
176 u"Greek, Modern (1453-)": u"Greek",
177 u"Interlingua (International Auxiliary Language Association)": u"Interlingua",
178 u"Kirghiz; Kyrgyz": u"Kirghiz",
179 u"Klingon; tlhIngan-Hol": u"Klingon",
180 u"Limburgan; Limburger; Limburgish": u"Limburgish",
181 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
182 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
183 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
184 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
185 u"Occitan (post 1500)": u"Occitan",
186 u"Panjabi; Punjabi": u"Punjabi",
187 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
188 u"Pushto; Pashto": u"Pashto",
189 u"Sinhala; Sinhalese": u"Sinhala",
190 u"Sotho, Southern": u"Sotho",
191 u"Spanish; Castilian": u"Spanish",
192 u"Uighur; Uyghur": u"Uighur",
193 }
194
195
197 """This attempts to simplify the given language code by ignoring country
198 codes, for example.
199
200 @see:
201 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
202 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
203 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
204 - U{http://www.w3.org/International/articles/language-tags/}
205 """
206 if not code:
207 return code
208
209 normalized = normalize_code(code)
210 separator = normalized.rfind('-')
211 if separator >= 0:
212 return code[:separator]
213 else:
214 return ""
215
216
217 expansion_factors = {
218 'af': 0.1,
219 'ar': -0.09,
220 'es': 0.21,
221 'fr': 0.28,
222 'it': 0.2,
223 }
224 """Source to target string length expansion factors."""
225
226 import gettext
227 import locale
228 import re
229 import os
230
231 iso639 = {}
232 """ISO 639 language codes"""
233 iso3166 = {}
234 """ISO 3166 country codes"""
235
236 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
237 langcode_ire = re.compile("^[a-z]{2,3}([_-][a-z]{2,3})?(@[a-z0-9]+)?$", re.IGNORECASE)
238 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
239
240
242 """matches a languagecode to another, ignoring regions in the second"""
243 if languagecode is None:
244 return langcode_re.match(otherlanguagecode)
245 return languagecode == otherlanguagecode or \
246 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
247
248 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]{,25})\)$")
249
250
251
252
254 """Gives a function that can translate a language name, even in the form C{"language (country)"},
255 into the language with iso code langcode, or the system language if no language is specified."""
256 langfunc = gettext_lang(langcode)
257 countryfunc = gettext_country(langcode)
258
259 def handlelanguage(name):
260 match = dialect_name_re.match(name)
261 if match:
262 language, country = match.groups()
263 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
264 else:
265 return _fix_language_name(langfunc(name))
266
267 return handlelanguage
268
269
271 """Identify and replace some unsightly names present in iso-codes.
272
273 If the name is present in _fixed_names we assume it is untranslated and
274 we replace it with a more usable rendering. If the remaining part is long
275 and includes a semi-colon, we only take the text up to the semi-colon to
276 keep things neat."""
277 if name in _fixed_names:
278 return _fixed_names[name]
279 elif len(name) > 11:
280
281
282 split_point = name[5:].find(u';')
283 if split_point >= 0:
284 return name[:5+split_point]
285 return name
286
287
288 -def gettext_lang(langcode=None):
289 """Returns a gettext function to translate language names into the given
290 language, or the system language if no language is specified."""
291 if not langcode in iso639:
292 if not langcode:
293 langcode = ""
294 if os.name == "nt":
295
296 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
297 else:
298 t = gettext.translation('iso_639', fallback=True)
299 else:
300 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
301 iso639[langcode] = t.ugettext
302 return iso639[langcode]
303
304
305 -def gettext_country(langcode=None):
306 """Returns a gettext function to translate country names into the given
307 language, or the system language if no language is specified."""
308 if not langcode in iso3166:
309 if not langcode:
310 langcode = ""
311 if os.name == "nt":
312
313 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
314 else:
315 t = gettext.translation('iso_3166', fallback=True)
316 else:
317 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
318 iso3166[langcode] = t.ugettext
319 return iso3166[langcode]
320
321
323 """Return a unicode string in its normalized form
324
325 @param string: The string to be normalized
326 @param normal_form: NFC (default), NFD, NFKC, NFKD
327 @return: Normalized string
328 """
329 if string is None:
330 return None
331 else:
332 return unicodedata.normalize(normal_form, string)
333
334
336 """Ensures that the string is in unicode.
337
338 @param string: A text string
339 @type string: Unicode, String
340 @return: String converted to Unicode and normalized as needed.
341 @rtype: Unicode
342 """
343 if string is None:
344 return None
345 if isinstance(string, str):
346 encoding = getattr(string, "encoding", "utf-8")
347 string = string.decode(encoding)
348 elif isinstance(string, StringElem):
349 string = unicode(string)
350 return string
351
352
354 """Forces the string to unicode and does normalization."""
355 return normalize(forceunicode(string))
356
357
362
363
365 """Simplify language code to the most commonly used form for the
366 language, stripping country information for languages that tend
367 not to be localized differently for different countries"""
368 simpler = simplercode(language_code)
369 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
370 return language_code
371 else:
372 return simplify_to_common(simpler)
373