diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-05-30 23:55:00 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-05-30 23:55:00 +0000 |
commit | 1d588b4fde447d38c978a11b8e89ae8b5436f2e8 (patch) | |
tree | 19736a02d812ebf470ce113a419588545f51bfc2 /enc/unicode/case-folding.rb | |
parent | 0148bd15e4928582adebc4afe4e18db30b68a5a6 (diff) | |
download | ruby-1d588b4fde447d38c978a11b8e89ae8b5436f2e8.tar.gz |
case-folding.rb: modularize
* enc/unicode/case-folding.rb (CaseFolding): modularize, and add
--output-file option.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46267 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/unicode/case-folding.rb')
-rwxr-xr-x | enc/unicode/case-folding.rb | 194 |
1 files changed, 114 insertions, 80 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb index e39bef20f8..09fc719ba9 100755 --- a/enc/unicode/case-folding.rb +++ b/enc/unicode/case-folding.rb @@ -4,104 +4,138 @@ # $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt # $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h +class CaseFolding + module Util + module_function -def hex_seq(v) - v.map {|i| "0x%04x" % i}.join(", ") -end - -def print_table(table, data) - print("static const #{table}[] = {\n") - for k, v in data.sort - if Array === k and k.length > 1 - sk = "{#{hex_seq(k)}}" - else - sk = "0x%04x" % k - end - print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n") + def hex_seq(v) + v.map {|i| "0x%04x" % i}.join(", ") end - print("};\n\n") -end - -def print_case_folding_data(filename) - pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/ - - fold = {} - unfold = [{}, {}, {}] - turkic = [] - IO.foreach(filename) do |line| - next unless res = pattern.match(line) - ch_from = res[1].to_i(16) - ch_to = [] - - if res[2] == 'T' - # Turkic case folding - turkic << ch_from - next + def print_table_1(dest, data) + for k, v in data.sort + sk = (Array === k and k.length > 1) ? "{#{hex_seq(k)}}" : ("0x%04x" % k) + dest.print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n") + end end - # store folding data - (3..6).each do |i| - if res[i] - ch_to << res[i].to_i(16) + def print_table(dest, type, data) + data.each do |n, d| + dest.print("static const #{type}_Type #{n}[] = {\n") + print_table_1(dest, d) + dest.print("};\n\n") end end - fold[ch_from] = ch_to - - # store unfolding data - i = ch_to.length - 1 - (unfold[i][ch_to] ||= []) << ch_from - end - - # move locale dependent data to (un)fold_locale - fold_locale = {} - unfold_locale = [{}, {}] - for ch_from in turkic - key = fold[ch_from] - i = key.length - 1 - unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key) - fold_locale[ch_from] = fold.delete(ch_from) end - # print the header - print("/* DO NOT EDIT THIS FILE. */\n") - print("/* Generated by tool/CaseFolding.py */\n\n") + include Util - # print folding data + attr_reader :fold, :fold_locale, :unfold, :unfold_locale - # CaseFold - print_table("CaseFold_11_Type CaseFold", fold) + def load(filename) + pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/ - # CaseFold_Locale - print_table("CaseFold_11_Type CaseFold_Locale", fold_locale) + @fold = fold = {} + @unfold = unfold = [{}, {}, {}] + turkic = [] - # print unfolding data + IO.foreach(filename) do |line| + next unless res = pattern.match(line) + ch_from = res[1].to_i(16) - # CaseUnfold_11 - print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0]) + if res[2] == 'T' + # Turkic case folding + turkic << ch_from + next + end - # CaseUnfold_11_Locale - print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0]) + # store folding data + ch_to = res[3..6].inject([]) do |a, i| + break a unless i + a << i.to_i(16) + end + fold[ch_from] = ch_to - # CaseUnfold_12 - print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1]) + # store unfolding data + i = ch_to.length - 1 + (unfold[i][ch_to] ||= []) << ch_from + end - # CaseUnfold_12_Locale - print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1]) + # move locale dependent data to (un)fold_locale + @fold_locale = fold_locale = {} + @unfold_locale = unfold_locale = [{}, {}] + for ch_from in turkic + key = fold[ch_from] + i = key.length - 1 + unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key) + fold_locale[ch_from] = fold.delete(ch_from) + end + self + end - # CaseUnfold_13 - print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2]) + def display(dest) + # print the header + dest.print("/* DO NOT EDIT THIS FILE. */\n") + dest.print("/* Generated by enc/unicode/case-folding.rb */\n\n") + + # print folding data + + # CaseFold + CaseFold_Locale + name = "CaseFold_11" + print_table(dest, name, "CaseFold"=>fold) + print_table(dest, name, "CaseFold_Locale"=>fold_locale) + + # print unfolding data + + # CaseUnfold_11 + CaseUnfold_11_Locale + name = "CaseUnfold_11" + print_table(dest, name, name=>unfold[0]) + print_table(dest, name, "#{name}_Locale"=>unfold_locale[0]) + + # CaseUnfold_12 + CaseUnfold_12_Locale + name = "CaseUnfold_12" + print_table(dest, name, name=>unfold[1]) + print_table(dest, name, "#{name}_Locale"=>unfold_locale[1]) + + # CaseUnfold_13 + name = "CaseUnfold_13" + print_table(dest, name, name=>unfold[2]) + + # table sizes + fold_table_size = fold.size + fold_locale.size + dest.printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2)) + unfold1_table_size = unfold[0].size + unfold_locale[0].size + dest.printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2)) + unfold2_table_size = unfold[1].size + unfold_locale[1].size + dest.printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5)) + unfold3_table_size = unfold[2].size + dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7)) + end - # table sizes - fold_table_size = fold.size + fold_locale.size - printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2)) - unfold1_table_size = unfold[0].size + unfold_locale[0].size - printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2)) - unfold2_table_size = unfold[1].size + unfold_locale[1].size - printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5)) - unfold3_table_size = unfold[2].size - printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7)) + def self.load(*args) + new.load(*args) + end end -filename = ARGV[0] || 'CaseFolding.txt' -print_case_folding_data(filename) +if $0 == __FILE__ + require 'optparse' + dest = nil + fold_1 = false + ARGV.options do |opt| + opt.banner << " [INPUT]" + opt.on("--output-file=FILE", "-o", "output to the FILE instead of STDOUT") {|output| + dest = (output unless output == '-') + } + opt.parse! + abort(opt.to_s) if ARGV.size > 1 + end + filename = ARGV[0] || 'CaseFolding.txt' + data = CaseFolding.load(filename) + if dest + open(dest, "wb") do |f| + data.display(f) + end + else + data.display(STDOUT) + end +end |