summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorAarni Koskela <akx@iki.fi>2016-02-14 21:45:19 +0200
committerAarni Koskela <akx@iki.fi>2016-02-14 21:45:19 +0200
commite59cfde41a970bd71c9dabc8cc72c6f845e7f009 (patch)
tree59f9bed7fd88ce9726975ad91149870e153790b0 /scripts
parent69d1b1265e68eacaff3e587e7a5fc398837a0fea (diff)
downloadbabel-e59cfde41a970bd71c9dabc8cc72c6f845e7f009.tar.gz
import_cldr: refactor locale data parsing into multiple functions
Special care was taken to order the new functions in the same order as the original source was; the resulting whitespace-ignoring diff is very minimal.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/import_cldr.py687
1 files changed, 361 insertions, 326 deletions
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 1b2db2c..0deca82 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -365,346 +365,381 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False):
if locale_id in ordinal_rules:
data['ordinal_form'] = ordinal_rules[locale_id]
- # <localeDisplayNames>
+ parse_locale_display_names(data, tree)
- territories = data.setdefault('territories', {})
- for elem in tree.findall('.//territories/territory'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib['type'] in territories:
- continue
- territories[elem.attrib['type']] = _text(elem)
+ parse_dates(data, tree, sup, regions, territory)
- languages = data.setdefault('languages', {})
- for elem in tree.findall('.//languages/language'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib['type'] in languages:
+ for calendar in tree.findall('.//calendars/calendar'):
+ if calendar.attrib['type'] != 'gregorian':
+ # TODO: support other calendar types
continue
- languages[elem.attrib['type']] = _text(elem)
- variants = data.setdefault('variants', {})
- for elem in tree.findall('.//variants/variant'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib['type'] in variants:
- continue
- variants[elem.attrib['type']] = _text(elem)
+ parse_calendar_months(data, calendar)
+ parse_calendar_days(data, calendar)
+ parse_calendar_quarters(data, calendar)
+ parse_calendar_eras(data, calendar)
+ parse_calendar_periods(data, calendar)
+ parse_calendar_date_formats(data, calendar)
+ parse_calendar_time_formats(data, calendar)
+ parse_calendar_datetime_skeletons(data, calendar)
+ parse_interval_formats(data, calendar)
- scripts = data.setdefault('scripts', {})
- for elem in tree.findall('.//scripts/script'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib['type'] in scripts:
- continue
- scripts[elem.attrib['type']] = _text(elem)
+ parse_number_symbols(data, tree)
+ parse_decimal_formats(data, tree)
+ parse_scientific_formats(data, tree)
+ parse_percent_formats(data, tree)
- list_patterns = data.setdefault('list_patterns', {})
- for listType in tree.findall('.//listPatterns/listPattern'):
- if 'type' in listType.attrib:
- continue
- for listPattern in listType.findall('listPatternPart'):
- list_patterns[listPattern.attrib['type']] = _text(listPattern)
-
- # <dates>
-
- week_data = data.setdefault('week_data', {})
- supelem = sup.find('.//weekData')
-
- for elem in supelem.findall('minDays'):
- territories = elem.attrib['territories'].split()
- if territory in territories or any([r in territories for r in regions]):
- week_data['min_days'] = int(elem.attrib['count'])
-
- for elem in supelem.findall('firstDay'):
- territories = elem.attrib['territories'].split()
- if territory in territories or any([r in territories for r in regions]):
- week_data['first_day'] = weekdays[elem.attrib['day']]
-
- for elem in supelem.findall('weekendStart'):
- territories = elem.attrib['territories'].split()
- if territory in territories or any([r in territories for r in regions]):
- week_data['weekend_start'] = weekdays[elem.attrib['day']]
-
- for elem in supelem.findall('weekendEnd'):
- territories = elem.attrib['territories'].split()
- if territory in territories or any([r in territories for r in regions]):
- week_data['weekend_end'] = weekdays[elem.attrib['day']]
-
- zone_formats = data.setdefault('zone_formats', {})
- for elem in tree.findall('.//timeZoneNames/gmtFormat'):
- if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
- zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
- break
- for elem in tree.findall('.//timeZoneNames/regionFormat'):
- if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
- zone_formats['region'] = text_type(elem.text).replace('{0}', '%s')
- break
- for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
- if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
- zone_formats['fallback'] = text_type(elem.text) \
- .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
- break
- for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
- if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
- zone_formats['fallback_region'] = text_type(elem.text) \
- .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
- break
-
- time_zones = data.setdefault('time_zones', {})
- for elem in tree.findall('.//timeZoneNames/zone'):
- info = {}
- city = elem.findtext('exemplarCity')
- if city:
- info['city'] = text_type(city)
- for child in elem.findall('long/*'):
- info.setdefault('long', {})[child.tag] = text_type(child.text)
- for child in elem.findall('short/*'):
- info.setdefault('short', {})[child.tag] = text_type(child.text)
- time_zones[elem.attrib['type']] = info
-
- meta_zones = data.setdefault('meta_zones', {})
- for elem in tree.findall('.//timeZoneNames/metazone'):
- info = {}
- city = elem.findtext('exemplarCity')
- if city:
- info['city'] = text_type(city)
- for child in elem.findall('long/*'):
- info.setdefault('long', {})[child.tag] = text_type(child.text)
- for child in elem.findall('short/*'):
- info.setdefault('short', {})[child.tag] = text_type(child.text)
- meta_zones[elem.attrib['type']] = info
+ parse_currency_formats(data, tree)
+ parse_currency_names(data, tree)
+ parse_unit_patterns(data, tree)
+ parse_date_fields(data, tree)
+
+ write_datafile(data_filename, data, dump_json=dump_json)
+
+
+def parse_locale_display_names(data, tree):
+ territories = data.setdefault('territories', {})
+ for elem in tree.findall('.//territories/territory'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in territories:
+ continue
+ territories[elem.attrib['type']] = _text(elem)
+ languages = data.setdefault('languages', {})
+ for elem in tree.findall('.//languages/language'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in languages:
+ continue
+ languages[elem.attrib['type']] = _text(elem)
+ variants = data.setdefault('variants', {})
+ for elem in tree.findall('.//variants/variant'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in variants:
+ continue
+ variants[elem.attrib['type']] = _text(elem)
+ scripts = data.setdefault('scripts', {})
+ for elem in tree.findall('.//scripts/script'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib['type'] in scripts:
+ continue
+ scripts[elem.attrib['type']] = _text(elem)
+ list_patterns = data.setdefault('list_patterns', {})
+ for listType in tree.findall('.//listPatterns/listPattern'):
+ if 'type' in listType.attrib:
+ continue
+ for listPattern in listType.findall('listPatternPart'):
+ list_patterns[listPattern.attrib['type']] = _text(listPattern)
+
+
+def parse_dates(data, tree, sup, regions, territory):
+ week_data = data.setdefault('week_data', {})
+ supelem = sup.find('.//weekData')
+ for elem in supelem.findall('minDays'):
+ territories = elem.attrib['territories'].split()
+ if territory in territories or any([r in territories for r in regions]):
+ week_data['min_days'] = int(elem.attrib['count'])
+ for elem in supelem.findall('firstDay'):
+ territories = elem.attrib['territories'].split()
+ if territory in territories or any([r in territories for r in regions]):
+ week_data['first_day'] = weekdays[elem.attrib['day']]
+ for elem in supelem.findall('weekendStart'):
+ territories = elem.attrib['territories'].split()
+ if territory in territories or any([r in territories for r in regions]):
+ week_data['weekend_start'] = weekdays[elem.attrib['day']]
+ for elem in supelem.findall('weekendEnd'):
+ territories = elem.attrib['territories'].split()
+ if territory in territories or any([r in territories for r in regions]):
+ week_data['weekend_end'] = weekdays[elem.attrib['day']]
+ zone_formats = data.setdefault('zone_formats', {})
+ for elem in tree.findall('.//timeZoneNames/gmtFormat'):
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+ zone_formats['gmt'] = text_type(elem.text).replace('{0}', '%s')
+ break
+ for elem in tree.findall('.//timeZoneNames/regionFormat'):
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+ zone_formats['region'] = text_type(elem.text).replace('{0}', '%s')
+ break
+ for elem in tree.findall('.//timeZoneNames/fallbackFormat'):
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+ zone_formats['fallback'] = text_type(elem.text) \
+ .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+ break
+ for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
+ if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+ zone_formats['fallback_region'] = text_type(elem.text) \
+ .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+ break
+ time_zones = data.setdefault('time_zones', {})
+ for elem in tree.findall('.//timeZoneNames/zone'):
+ info = {}
+ city = elem.findtext('exemplarCity')
+ if city:
+ info['city'] = text_type(city)
+ for child in elem.findall('long/*'):
+ info.setdefault('long', {})[child.tag] = text_type(child.text)
+ for child in elem.findall('short/*'):
+ info.setdefault('short', {})[child.tag] = text_type(child.text)
+ time_zones[elem.attrib['type']] = info
+ meta_zones = data.setdefault('meta_zones', {})
+ for elem in tree.findall('.//timeZoneNames/metazone'):
+ info = {}
+ city = elem.findtext('exemplarCity')
+ if city:
+ info['city'] = text_type(city)
+ for child in elem.findall('long/*'):
+ info.setdefault('long', {})[child.tag] = text_type(child.text)
+ for child in elem.findall('short/*'):
+ info.setdefault('short', {})[child.tag] = text_type(child.text)
+ meta_zones[elem.attrib['type']] = info
+
+
+def parse_calendar_months(data, calendar):
+ months = data.setdefault('months', {})
+ for ctxt in calendar.findall('months/monthContext'):
+ ctxt_type = ctxt.attrib['type']
+ ctxts = months.setdefault(ctxt_type, {})
+ for width in ctxt.findall('monthWidth'):
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'month':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib.get('type'))] = \
+ text_type(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['months', ctxt_type, width_type],
+ elem.attrib['path'])
+ )
- for calendar in tree.findall('.//calendars/calendar'):
- if calendar.attrib['type'] != 'gregorian':
- # TODO: support other calendar types
- continue
- months = data.setdefault('months', {})
- for ctxt in calendar.findall('months/monthContext'):
- ctxt_type = ctxt.attrib['type']
- ctxts = months.setdefault(ctxt_type, {})
- for width in ctxt.findall('monthWidth'):
- width_type = width.attrib['type']
- widths = ctxts.setdefault(width_type, {})
- for elem in width.getiterator():
- if elem.tag == 'month':
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib.get('type'))] = \
- text_type(elem.text)
- elif elem.tag == 'alias':
- ctxts[width_type] = Alias(
- _translate_alias(['months', ctxt_type, width_type],
- elem.attrib['path'])
- )
-
- days = data.setdefault('days', {})
- for ctxt in calendar.findall('days/dayContext'):
- ctxt_type = ctxt.attrib['type']
- ctxts = days.setdefault(ctxt_type, {})
- for width in ctxt.findall('dayWidth'):
- width_type = width.attrib['type']
- widths = ctxts.setdefault(width_type, {})
- for elem in width.getiterator():
- if elem.tag == 'day':
- dtype = weekdays[elem.attrib['type']]
- if ('draft' in elem.attrib or
+def parse_calendar_days(data, calendar):
+ days = data.setdefault('days', {})
+ for ctxt in calendar.findall('days/dayContext'):
+ ctxt_type = ctxt.attrib['type']
+ ctxts = days.setdefault(ctxt_type, {})
+ for width in ctxt.findall('dayWidth'):
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'day':
+ dtype = weekdays[elem.attrib['type']]
+ if ('draft' in elem.attrib or
'alt' not in elem.attrib) \
- and dtype in widths:
- continue
- widths[dtype] = text_type(elem.text)
- elif elem.tag == 'alias':
- ctxts[width_type] = Alias(
- _translate_alias(['days', ctxt_type, width_type],
- elem.attrib['path'])
- )
-
- quarters = data.setdefault('quarters', {})
- for ctxt in calendar.findall('quarters/quarterContext'):
- ctxt_type = ctxt.attrib['type']
- ctxts = quarters.setdefault(ctxt.attrib['type'], {})
- for width in ctxt.findall('quarterWidth'):
- width_type = width.attrib['type']
- widths = ctxts.setdefault(width_type, {})
- for elem in width.getiterator():
- if elem.tag == 'quarter':
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib['type'])] = text_type(elem.text)
- elif elem.tag == 'alias':
- ctxts[width_type] = Alias(
- _translate_alias(['quarters', ctxt_type,
- width_type],
- elem.attrib['path']))
-
- eras = data.setdefault('eras', {})
- for width in calendar.findall('eras/*'):
- width_type = NAME_MAP[width.tag]
- widths = eras.setdefault(width_type, {})
- for elem in width.getiterator():
- if elem.tag == 'era':
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and int(elem.attrib['type']) in widths:
- continue
- widths[int(elem.attrib.get('type'))] = text_type(elem.text)
- elif elem.tag == 'alias':
- eras[width_type] = Alias(
- _translate_alias(['eras', width_type],
- elem.attrib['path'])
- )
-
- # AM/PM
- periods = data.setdefault('periods', {})
- for day_period_width in calendar.findall(
- 'dayPeriods/dayPeriodContext/dayPeriodWidth'):
- if day_period_width.attrib['type'] == 'wide':
- for day_period in day_period_width.findall('dayPeriod'):
- if 'alt' not in day_period.attrib:
- periods[day_period.attrib['type']] = text_type(
- day_period.text)
-
- date_formats = data.setdefault('date_formats', {})
- for format in calendar.findall('dateFormats'):
- for elem in format.getiterator():
- if elem.tag == 'dateFormatLength':
- if 'draft' in elem.attrib and \
- elem.attrib.get('type') in date_formats:
- continue
- try:
- date_formats[elem.attrib.get('type')] = \
- dates.parse_pattern(text_type(
- elem.findtext('dateFormat/pattern')))
- except ValueError as e:
- error(e)
- elif elem.tag == 'alias':
- date_formats = Alias(_translate_alias(
- ['date_formats'], elem.attrib['path'])
- )
-
- time_formats = data.setdefault('time_formats', {})
- for format in calendar.findall('timeFormats'):
- for elem in format.getiterator():
- if elem.tag == 'timeFormatLength':
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib.get('type') in time_formats:
- continue
- try:
- time_formats[elem.attrib.get('type')] = \
- dates.parse_pattern(text_type(
- elem.findtext('timeFormat/pattern')))
- except ValueError as e:
- error(e)
- elif elem.tag == 'alias':
- time_formats = Alias(_translate_alias(
- ['time_formats'], elem.attrib['path'])
- )
-
- datetime_formats = data.setdefault('datetime_formats', {})
- datetime_skeletons = data.setdefault('datetime_skeletons', {})
- for format in calendar.findall('dateTimeFormats'):
- for elem in format.getiterator():
- if elem.tag == 'dateTimeFormatLength':
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib.get('type') in datetime_formats:
- continue
- try:
- datetime_formats[elem.attrib.get('type')] = \
- text_type(elem.findtext('dateTimeFormat/pattern'))
- except ValueError as e:
- error(e)
- elif elem.tag == 'alias':
- datetime_formats = Alias(_translate_alias(
- ['datetime_formats'], elem.attrib['path'])
- )
- elif elem.tag == 'availableFormats':
- for datetime_skeleton in elem.findall('dateFormatItem'):
- datetime_skeletons[datetime_skeleton.attrib['id']] = \
- dates.parse_pattern(text_type(datetime_skeleton.text))
+ and dtype in widths:
+ continue
+ widths[dtype] = text_type(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['days', ctxt_type, width_type],
+ elem.attrib['path'])
+ )
- parse_interval_formats(data, calendar)
- # <numbers>
+def parse_calendar_quarters(data, calendar):
+ quarters = data.setdefault('quarters', {})
+ for ctxt in calendar.findall('quarters/quarterContext'):
+ ctxt_type = ctxt.attrib['type']
+ ctxts = quarters.setdefault(ctxt.attrib['type'], {})
+ for width in ctxt.findall('quarterWidth'):
+ width_type = width.attrib['type']
+ widths = ctxts.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'quarter':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib['type'])] = text_type(elem.text)
+ elif elem.tag == 'alias':
+ ctxts[width_type] = Alias(
+ _translate_alias(['quarters', ctxt_type,
+ width_type],
+ elem.attrib['path']))
+
+
+def parse_calendar_eras(data, calendar):
+ eras = data.setdefault('eras', {})
+ for width in calendar.findall('eras/*'):
+ width_type = NAME_MAP[width.tag]
+ widths = eras.setdefault(width_type, {})
+ for elem in width.getiterator():
+ if elem.tag == 'era':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and int(elem.attrib['type']) in widths:
+ continue
+ widths[int(elem.attrib.get('type'))] = text_type(elem.text)
+ elif elem.tag == 'alias':
+ eras[width_type] = Alias(
+ _translate_alias(['eras', width_type],
+ elem.attrib['path'])
+ )
+
+
+def parse_calendar_periods(data, calendar):
+ # AM/PM
+ periods = data.setdefault('periods', {})
+ for day_period_width in calendar.findall(
+ 'dayPeriods/dayPeriodContext/dayPeriodWidth'):
+ if day_period_width.attrib['type'] == 'wide':
+ for day_period in day_period_width.findall('dayPeriod'):
+ if 'alt' not in day_period.attrib:
+ periods[day_period.attrib['type']] = text_type(
+ day_period.text)
+
+
+def parse_calendar_date_formats(data, calendar):
+ date_formats = data.setdefault('date_formats', {})
+ for format in calendar.findall('dateFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'dateFormatLength':
+ if 'draft' in elem.attrib and \
+ elem.attrib.get('type') in date_formats:
+ continue
+ try:
+ date_formats[elem.attrib.get('type')] = \
+ dates.parse_pattern(text_type(
+ elem.findtext('dateFormat/pattern')))
+ except ValueError as e:
+ error(e)
+ elif elem.tag == 'alias':
+ date_formats = Alias(_translate_alias(
+ ['date_formats'], elem.attrib['path'])
+ )
+
+
+def parse_calendar_time_formats(data, calendar):
+ time_formats = data.setdefault('time_formats', {})
+ for format in calendar.findall('timeFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'timeFormatLength':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in time_formats:
+ continue
+ try:
+ time_formats[elem.attrib.get('type')] = \
+ dates.parse_pattern(text_type(
+ elem.findtext('timeFormat/pattern')))
+ except ValueError as e:
+ error(e)
+ elif elem.tag == 'alias':
+ time_formats = Alias(_translate_alias(
+ ['time_formats'], elem.attrib['path'])
+ )
+
+
+def parse_calendar_datetime_skeletons(data, calendar):
+ datetime_formats = data.setdefault('datetime_formats', {})
+ datetime_skeletons = data.setdefault('datetime_skeletons', {})
+ for format in calendar.findall('dateTimeFormats'):
+ for elem in format.getiterator():
+ if elem.tag == 'dateTimeFormatLength':
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in datetime_formats:
+ continue
+ try:
+ datetime_formats[elem.attrib.get('type')] = \
+ text_type(elem.findtext('dateTimeFormat/pattern'))
+ except ValueError as e:
+ error(e)
+ elif elem.tag == 'alias':
+ datetime_formats = Alias(_translate_alias(
+ ['datetime_formats'], elem.attrib['path'])
+ )
+ elif elem.tag == 'availableFormats':
+ for datetime_skeleton in elem.findall('dateFormatItem'):
+ datetime_skeletons[datetime_skeleton.attrib['id']] = \
+ dates.parse_pattern(text_type(datetime_skeleton.text))
+
+
+def parse_number_symbols(data, tree):
+ number_symbols = data.setdefault('number_symbols', {})
+ for elem in tree.findall('.//numbers/symbols/*'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib):
+ continue
+ number_symbols[elem.tag] = text_type(elem.text)
- number_symbols = data.setdefault('number_symbols', {})
- for elem in tree.findall('.//numbers/symbols/*'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib):
- continue
- number_symbols[elem.tag] = text_type(elem.text)
- decimal_formats = data.setdefault('decimal_formats', {})
- for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib.get('type') in decimal_formats:
- continue
- if elem.findall('./alias'):
- # TODO map the alias to its target
- continue
- pattern = text_type(elem.findtext('./decimalFormat/pattern'))
- decimal_formats[elem.attrib.get('type')] = \
- numbers.parse_pattern(pattern)
-
- scientific_formats = data.setdefault('scientific_formats', {})
- for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib.get('type') in scientific_formats:
- continue
- pattern = text_type(elem.findtext('scientificFormat/pattern'))
- scientific_formats[elem.attrib.get('type')] = \
- numbers.parse_pattern(pattern)
+def parse_decimal_formats(data, tree):
+ decimal_formats = data.setdefault('decimal_formats', {})
+ for elem in tree.findall('.//decimalFormats/decimalFormatLength'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in decimal_formats:
+ continue
+ if elem.findall('./alias'):
+ # TODO map the alias to its target
+ continue
+ pattern = text_type(elem.findtext('./decimalFormat/pattern'))
+ decimal_formats[elem.attrib.get('type')] = \
+ numbers.parse_pattern(pattern)
- parse_currency_formats(data, tree)
- percent_formats = data.setdefault('percent_formats', {})
- for elem in tree.findall('.//percentFormats/percentFormatLength'):
- if ('draft' in elem.attrib or 'alt' in elem.attrib) \
- and elem.attrib.get('type') in percent_formats:
- continue
- pattern = text_type(elem.findtext('percentFormat/pattern'))
- percent_formats[elem.attrib.get('type')] = \
- numbers.parse_pattern(pattern)
-
- currency_names = data.setdefault('currency_names', {})
- currency_names_plural = data.setdefault('currency_names_plural', {})
- currency_symbols = data.setdefault('currency_symbols', {})
- for elem in tree.findall('.//currencies/currency'):
- code = elem.attrib['type']
- for name in elem.findall('displayName'):
- if ('draft' in name.attrib) and code in currency_names:
- continue
- if 'count' in name.attrib:
- currency_names_plural.setdefault(code, {})[
- name.attrib['count']] = text_type(name.text)
- else:
- currency_names[code] = text_type(name.text)
- # TODO: support choice patterns for currency symbol selection
- symbol = elem.find('symbol')
- if symbol is not None and 'draft' not in symbol.attrib \
- and 'choice' not in symbol.attrib:
- currency_symbols[code] = text_type(symbol.text)
-
- # <units>
-
- unit_patterns = data.setdefault('unit_patterns', {})
- for elem in tree.findall('.//units/unitLength'):
- unit_length_type = elem.attrib['type']
- for unit in elem.findall('unit'):
- unit_type = unit.attrib['type']
- for pattern in unit.findall('unitPattern'):
- box = unit_type
- box += ':' + unit_length_type
- unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \
- text_type(pattern.text)
-
- date_fields = data.setdefault('date_fields', {})
- for elem in tree.findall('.//dates/fields/field'):
- field_type = elem.attrib['type']
- date_fields.setdefault(field_type, {})
- for rel_time in elem.findall('relativeTime'):
- rel_time_type = rel_time.attrib['type']
- for pattern in rel_time.findall('relativeTimePattern'):
- date_fields[field_type].setdefault(rel_time_type, {})\
- [pattern.attrib['count']] = text_type(pattern.text)
+def parse_scientific_formats(data, tree):
+ scientific_formats = data.setdefault('scientific_formats', {})
+ for elem in tree.findall('.//scientificFormats/scientificFormatLength'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in scientific_formats:
+ continue
+ pattern = text_type(elem.findtext('scientificFormat/pattern'))
+ scientific_formats[elem.attrib.get('type')] = \
+ numbers.parse_pattern(pattern)
- write_datafile(data_filename, data, dump_json=dump_json)
+
+def parse_percent_formats(data, tree):
+ percent_formats = data.setdefault('percent_formats', {})
+ for elem in tree.findall('.//percentFormats/percentFormatLength'):
+ if ('draft' in elem.attrib or 'alt' in elem.attrib) \
+ and elem.attrib.get('type') in percent_formats:
+ continue
+ pattern = text_type(elem.findtext('percentFormat/pattern'))
+ percent_formats[elem.attrib.get('type')] = \
+ numbers.parse_pattern(pattern)
+
+
+def parse_currency_names(data, tree):
+ currency_names = data.setdefault('currency_names', {})
+ currency_names_plural = data.setdefault('currency_names_plural', {})
+ currency_symbols = data.setdefault('currency_symbols', {})
+ for elem in tree.findall('.//currencies/currency'):
+ code = elem.attrib['type']
+ for name in elem.findall('displayName'):
+ if ('draft' in name.attrib) and code in currency_names:
+ continue
+ if 'count' in name.attrib:
+ currency_names_plural.setdefault(code, {})[
+ name.attrib['count']] = text_type(name.text)
+ else:
+ currency_names[code] = text_type(name.text)
+ # TODO: support choice patterns for currency symbol selection
+ symbol = elem.find('symbol')
+ if symbol is not None and 'draft' not in symbol.attrib \
+ and 'choice' not in symbol.attrib:
+ currency_symbols[code] = text_type(symbol.text)
+
+
+def parse_unit_patterns(data, tree):
+ unit_patterns = data.setdefault('unit_patterns', {})
+ for elem in tree.findall('.//units/unitLength'):
+ unit_length_type = elem.attrib['type']
+ for unit in elem.findall('unit'):
+ unit_type = unit.attrib['type']
+ for pattern in unit.findall('unitPattern'):
+ box = unit_type
+ box += ':' + unit_length_type
+ unit_patterns.setdefault(box, {})[pattern.attrib['count']] = \
+ text_type(pattern.text)
+
+
+def parse_date_fields(data, tree):
+ date_fields = data.setdefault('date_fields', {})
+ for elem in tree.findall('.//dates/fields/field'):
+ field_type = elem.attrib['type']
+ date_fields.setdefault(field_type, {})
+ for rel_time in elem.findall('relativeTime'):
+ rel_time_type = rel_time.attrib['type']
+ for pattern in rel_time.findall('relativeTimePattern'):
+ date_fields[field_type].setdefault(rel_time_type, {}) \
+ [pattern.attrib['count']] = text_type(pattern.text)
def parse_interval_formats(data, tree):
@@ -738,8 +773,8 @@ def parse_currency_formats(data, tree):
for child in elem.getiterator():
if child.tag == 'alias':
currency_formats[type] = Alias(
- _translate_alias(['currency_formats', elem.attrib['type']],
- child.attrib['path'])
+ _translate_alias(['currency_formats', elem.attrib['type']],
+ child.attrib['path'])
)
elif child.tag == 'pattern':
pattern = text_type(child.text)