summaryrefslogtreecommitdiff
path: root/Lib/locale.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/locale.py')
-rw-r--r--Lib/locale.py175
1 files changed, 120 insertions, 55 deletions
diff --git a/Lib/locale.py b/Lib/locale.py
index d2a885d519..7cfea618d7 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -336,6 +336,22 @@ def _test():
# overridden below)
_setlocale = setlocale
+def _replace_encoding(code, encoding):
+ if '.' in code:
+ langname = code[:code.index('.')]
+ else:
+ langname = code
+ # Convert the encoding to a C lib compatible encoding string
+ norm_encoding = encodings.normalize_encoding(encoding)
+ #print('norm encoding: %r' % norm_encoding)
+ norm_encoding = encodings.aliases.aliases.get(norm_encoding,
+ norm_encoding)
+ #print('aliased encoding: %r' % norm_encoding)
+ encoding = locale_encoding_alias.get(norm_encoding,
+ norm_encoding)
+ #print('found encoding %r' % encoding)
+ return langname + '.' + encoding
+
def normalize(localename):
""" Returns a normalized locale code for the given locale
@@ -352,55 +368,71 @@ def normalize(localename):
does.
"""
- # Normalize the locale name and extract the encoding
- fullname = localename.lower()
- if ':' in fullname:
+ # Normalize the locale name and extract the encoding and modifier
+ code = localename.lower()
+ if ':' in code:
# ':' is sometimes used as encoding delimiter.
- fullname = fullname.replace(':', '.')
- if '.' in fullname:
- langname, encoding = fullname.split('.')[:2]
- fullname = langname + '.' + encoding
+ code = code.replace(':', '.')
+ if '@' in code:
+ code, modifier = code.split('@', 1)
else:
- langname = fullname
+ modifier = ''
+ if '.' in code:
+ langname, encoding = code.split('.')[:2]
+ else:
+ langname = code
encoding = ''
- # First lookup: fullname (possibly with encoding)
- norm_encoding = encoding.replace('-', '')
- norm_encoding = norm_encoding.replace('_', '')
- lookup_name = langname + '.' + encoding
+ # First lookup: fullname (possibly with encoding and modifier)
+ lang_enc = langname
+ if encoding:
+ norm_encoding = encoding.replace('-', '')
+ norm_encoding = norm_encoding.replace('_', '')
+ lang_enc += '.' + norm_encoding
+ lookup_name = lang_enc
+ if modifier:
+ lookup_name += '@' + modifier
code = locale_alias.get(lookup_name, None)
if code is not None:
return code
- #print 'first lookup failed'
-
- # Second try: langname (without encoding)
- code = locale_alias.get(langname, None)
- if code is not None:
- #print 'langname lookup succeeded'
- if '.' in code:
- langname, defenc = code.split('.')
- else:
- langname = code
- defenc = ''
- if encoding:
- # Convert the encoding to a C lib compatible encoding string
- norm_encoding = encodings.normalize_encoding(encoding)
- #print 'norm encoding: %r' % norm_encoding
- norm_encoding = encodings.aliases.aliases.get(norm_encoding,
- norm_encoding)
- #print 'aliased encoding: %r' % norm_encoding
- encoding = locale_encoding_alias.get(norm_encoding,
- norm_encoding)
- else:
- encoding = defenc
- #print 'found encoding %r' % encoding
- if encoding:
- return langname + '.' + encoding
- else:
- return langname
-
- else:
- return localename
+ #print('first lookup failed')
+
+ if modifier:
+ # Second try: fullname without modifier (possibly with encoding)
+ code = locale_alias.get(lang_enc, None)
+ if code is not None:
+ #print('lookup without modifier succeeded')
+ if '@' not in code:
+ return code + '@' + modifier
+ if code.split('@', 1)[1].lower() == modifier:
+ return code
+ #print('second lookup failed')
+
+ if encoding:
+ # Third try: langname (without encoding, possibly with modifier)
+ lookup_name = langname
+ if modifier:
+ lookup_name += '@' + modifier
+ code = locale_alias.get(lookup_name, None)
+ if code is not None:
+ #print('lookup without encoding succeeded')
+ if '@' not in code:
+ return _replace_encoding(code, encoding)
+ code, modifier = code.split('@', 1)
+ return _replace_encoding(code, encoding) + '@' + modifier
+
+ if modifier:
+ # Fourth try: langname (without encoding and modifier)
+ code = locale_alias.get(langname, None)
+ if code is not None:
+ #print('lookup without modifier and encoding succeeded')
+ if '@' not in code:
+ return _replace_encoding(code, encoding) + '@' + modifier
+ code, defmod = code.split('@', 1)
+ if defmod.lower() == modifier:
+ return _replace_encoding(code, encoding) + '@' + defmod
+
+ return localename
def _parse_localename(localename):
@@ -419,7 +451,7 @@ def _parse_localename(localename):
code = normalize(localename)
if '@' in code:
# Deal with locale modifiers
- code, modifier = code.split('@')
+ code, modifier = code.split('@', 1)
if modifier == 'euro' and '.' not in code:
# Assume Latin-9 for @euro locales. This is bogus,
# since some systems may use other encodings for these
@@ -743,11 +775,30 @@ locale_encoding_alias = {
# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
#
+# SS 2013-12-20:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 3.3.3
+# and older):
+#
+# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
+# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
+# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
+# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
+# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
+# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
locale_alias = {
- 'a3': 'a3_AZ.KOI8-C',
- 'a3_az': 'a3_AZ.KOI8-C',
- 'a3_az.koi8c': 'a3_AZ.KOI8-C',
+ 'a3': 'az_AZ.KOI8-C',
+ 'a3_az': 'az_AZ.KOI8-C',
+ 'a3_az.koi8c': 'az_AZ.KOI8-C',
+ 'a3_az.koic': 'az_AZ.KOI8-C',
'af': 'af_ZA.ISO8859-1',
'af_za': 'af_ZA.ISO8859-1',
'af_za.iso88591': 'af_ZA.ISO8859-1',
@@ -766,6 +817,7 @@ locale_alias = {
'ar_dz.iso88596': 'ar_DZ.ISO8859-6',
'ar_eg': 'ar_EG.ISO8859-6',
'ar_eg.iso88596': 'ar_EG.ISO8859-6',
+ 'ar_in': 'ar_IN.UTF-8',
'ar_iq': 'ar_IQ.ISO8859-6',
'ar_iq.iso88596': 'ar_IQ.ISO8859-6',
'ar_jo': 'ar_JO.ISO8859-6',
@@ -795,6 +847,7 @@ locale_alias = {
'arabic': 'ar_AA.ISO8859-6',
'arabic.iso88596': 'ar_AA.ISO8859-6',
'as': 'as_IN.UTF-8',
+ 'as_in': 'as_IN.UTF-8',
'az': 'az_AZ.ISO8859-9E',
'az_az': 'az_AZ.ISO8859-9E',
'az_az.iso88599e': 'az_AZ.ISO8859-9E',
@@ -812,6 +865,7 @@ locale_alias = {
'bg_bg.koi8r': 'bg_BG.KOI8-R',
'bg_bg.microsoftcp1251': 'bg_BG.CP1251',
'bn_in': 'bn_IN.UTF-8',
+ 'bo_in': 'bo_IN.UTF-8',
'bokmal': 'nb_NO.ISO8859-1',
'bokm\xe5l': 'nb_NO.ISO8859-1',
'br': 'br_FR.ISO8859-1',
@@ -829,6 +883,7 @@ locale_alias = {
'c': 'C',
'c-french': 'fr_CA.ISO8859-1',
'c-french.iso88591': 'fr_CA.ISO8859-1',
+ 'c.ascii': 'C',
'c.en': 'C',
'c.iso88591': 'en_US.ISO8859-1',
'c_c': 'C',
@@ -866,7 +921,7 @@ locale_alias = {
'croatian': 'hr_HR.ISO8859-2',
'cs': 'cs_CZ.ISO8859-2',
'cs_cs': 'cs_CZ.ISO8859-2',
- 'cs_cs.iso88592': 'cs_CS.ISO8859-2',
+ 'cs_cs.iso88592': 'cs_CZ.ISO8859-2',
'cs_cz': 'cs_CZ.ISO8859-2',
'cs_cz.iso88592': 'cs_CZ.ISO8859-2',
'cy': 'cy_GB.ISO8859-1',
@@ -1170,12 +1225,13 @@ locale_alias = {
'he_il.cp1255': 'he_IL.CP1255',
'he_il.iso88598': 'he_IL.ISO8859-8',
'he_il.microsoftcp1255': 'he_IL.CP1255',
- 'hebrew': 'iw_IL.ISO8859-8',
- 'hebrew.iso88598': 'iw_IL.ISO8859-8',
+ 'hebrew': 'he_IL.ISO8859-8',
+ 'hebrew.iso88598': 'he_IL.ISO8859-8',
'hi': 'hi_IN.ISCII-DEV',
'hi_in': 'hi_IN.ISCII-DEV',
'hi_in.isciidev': 'hi_IN.ISCII-DEV',
'hne': 'hne_IN.UTF-8',
+ 'hne_in': 'hne_IN.UTF-8',
'hr': 'hr_HR.ISO8859-2',
'hr_hr': 'hr_HR.ISO8859-2',
'hr_hr.iso88592': 'hr_HR.ISO8859-2',
@@ -1263,7 +1319,8 @@ locale_alias = {
'korean': 'ko_KR.eucKR',
'korean.euc': 'ko_KR.eucKR',
'ks': 'ks_IN.UTF-8',
- 'ks_in@devanagari': 'ks_IN@devanagari.UTF-8',
+ 'ks_in': 'ks_IN.UTF-8',
+ 'ks_in@devanagari': 'ks_IN.UTF-8@devanagari',
'kw': 'kw_GB.ISO8859-1',
'kw_gb': 'kw_GB.ISO8859-1',
'kw_gb.iso88591': 'kw_GB.ISO8859-1',
@@ -1287,6 +1344,7 @@ locale_alias = {
'lv_lv.iso885913': 'lv_LV.ISO8859-13',
'lv_lv.iso88594': 'lv_LV.ISO8859-4',
'mai': 'mai_IN.UTF-8',
+ 'mai_in': 'mai_IN.UTF-8',
'mi': 'mi_NZ.ISO8859-1',
'mi_nz': 'mi_NZ.ISO8859-1',
'mi_nz.iso88591': 'mi_NZ.ISO8859-1',
@@ -1296,6 +1354,7 @@ locale_alias = {
'mk_mk.iso88595': 'mk_MK.ISO8859-5',
'mk_mk.microsoftcp1251': 'mk_MK.CP1251',
'ml': 'ml_IN.UTF-8',
+ 'ml_in': 'ml_IN.UTF-8',
'mr': 'mr_IN.UTF-8',
'mr_in': 'mr_IN.UTF-8',
'ms': 'ms_MY.ISO8859-1',
@@ -1310,6 +1369,7 @@ locale_alias = {
'nb_no.iso88591': 'nb_NO.ISO8859-1',
'nb_no.iso885915': 'nb_NO.ISO8859-15',
'nb_no@euro': 'nb_NO.ISO8859-15',
+ 'ne_np': 'ne_NP.UTF-8',
'nl': 'nl_NL.ISO8859-1',
'nl.iso885915': 'nl_NL.ISO8859-15',
'nl_be': 'nl_BE.ISO8859-1',
@@ -1362,6 +1422,7 @@ locale_alias = {
'oc_fr.iso885915': 'oc_FR.ISO8859-15',
'oc_fr@euro': 'oc_FR.ISO8859-15',
'or': 'or_IN.UTF-8',
+ 'or_in': 'or_IN.UTF-8',
'pa': 'pa_IN.UTF-8',
'pa_in': 'pa_IN.UTF-8',
'pd': 'pd_US.ISO8859-1',
@@ -1423,7 +1484,10 @@ locale_alias = {
'rw': 'rw_RW.ISO8859-1',
'rw_rw': 'rw_RW.ISO8859-1',
'rw_rw.iso88591': 'rw_RW.ISO8859-1',
- 'sd': 'sd_IN@devanagari.UTF-8',
+ 'sd': 'sd_IN.UTF-8',
+ 'sd@devanagari': 'sd_IN.UTF-8@devanagari',
+ 'sd_in': 'sd_IN.UTF-8',
+ 'sd_in@devanagari': 'sd_IN.UTF-8@devanagari',
'se_no': 'se_NO.UTF-8',
'serbocroatian': 'sr_RS.UTF-8@latin',
'sh': 'sr_RS.UTF-8@latin',
@@ -1457,13 +1521,13 @@ locale_alias = {
'sr': 'sr_RS.UTF-8',
'sr@cyrillic': 'sr_RS.UTF-8',
'sr@latin': 'sr_RS.UTF-8@latin',
- 'sr@latn': 'sr_RS.UTF-8@latin',
- 'sr_cs': 'sr_RS.UTF-8',
+ 'sr@latn': 'sr_CS.UTF-8@latin',
+ 'sr_cs': 'sr_CS.UTF-8',
'sr_cs.iso88592': 'sr_CS.ISO8859-2',
'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
'sr_cs.iso88595': 'sr_CS.ISO8859-5',
- 'sr_cs.utf8@latn': 'sr_RS.UTF-8@latin',
- 'sr_cs@latn': 'sr_RS.UTF-8@latin',
+ 'sr_cs.utf8@latn': 'sr_CS.UTF-8@latin',
+ 'sr_cs@latn': 'sr_CS.UTF-8@latin',
'sr_me': 'sr_ME.UTF-8',
'sr_rs': 'sr_RS.UTF-8',
'sr_rs.utf8@latn': 'sr_RS.UTF-8@latin',
@@ -1541,6 +1605,7 @@ locale_alias = {
'universal': 'en_US.utf',
'universal.utf8@ucs4': 'en_US.UTF-8',
'ur': 'ur_PK.CP1256',
+ 'ur_in': 'ur_IN.UTF-8',
'ur_pk': 'ur_PK.CP1256',
'ur_pk.cp1256': 'ur_PK.CP1256',
'ur_pk.microsoftcp1256': 'ur_PK.CP1256',