summaryrefslogtreecommitdiff
path: root/enc/unicode/case-folding.rb
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-05-30 23:55:00 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-05-30 23:55:00 +0000
commit1d588b4fde447d38c978a11b8e89ae8b5436f2e8 (patch)
tree19736a02d812ebf470ce113a419588545f51bfc2 /enc/unicode/case-folding.rb
parent0148bd15e4928582adebc4afe4e18db30b68a5a6 (diff)
downloadruby-1d588b4fde447d38c978a11b8e89ae8b5436f2e8.tar.gz
case-folding.rb: modularize
* enc/unicode/case-folding.rb (CaseFolding): modularize, and add --output-file option. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46267 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/unicode/case-folding.rb')
-rwxr-xr-xenc/unicode/case-folding.rb194
1 files changed, 114 insertions, 80 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index e39bef20f8..09fc719ba9 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -4,104 +4,138 @@
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h
+class CaseFolding
+ module Util
+ module_function
-def hex_seq(v)
- v.map {|i| "0x%04x" % i}.join(", ")
-end
-
-def print_table(table, data)
- print("static const #{table}[] = {\n")
- for k, v in data.sort
- if Array === k and k.length > 1
- sk = "{#{hex_seq(k)}}"
- else
- sk = "0x%04x" % k
- end
- print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n")
+ def hex_seq(v)
+ v.map {|i| "0x%04x" % i}.join(", ")
end
- print("};\n\n")
-end
-
-def print_case_folding_data(filename)
- pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
-
- fold = {}
- unfold = [{}, {}, {}]
- turkic = []
- IO.foreach(filename) do |line|
- next unless res = pattern.match(line)
- ch_from = res[1].to_i(16)
- ch_to = []
-
- if res[2] == 'T'
- # Turkic case folding
- turkic << ch_from
- next
+ def print_table_1(dest, data)
+ for k, v in data.sort
+ sk = (Array === k and k.length > 1) ? "{#{hex_seq(k)}}" : ("0x%04x" % k)
+ dest.print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n")
+ end
end
- # store folding data
- (3..6).each do |i|
- if res[i]
- ch_to << res[i].to_i(16)
+ def print_table(dest, type, data)
+ data.each do |n, d|
+ dest.print("static const #{type}_Type #{n}[] = {\n")
+ print_table_1(dest, d)
+ dest.print("};\n\n")
end
end
- fold[ch_from] = ch_to
-
- # store unfolding data
- i = ch_to.length - 1
- (unfold[i][ch_to] ||= []) << ch_from
- end
-
- # move locale dependent data to (un)fold_locale
- fold_locale = {}
- unfold_locale = [{}, {}]
- for ch_from in turkic
- key = fold[ch_from]
- i = key.length - 1
- unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
- fold_locale[ch_from] = fold.delete(ch_from)
end
- # print the header
- print("/* DO NOT EDIT THIS FILE. */\n")
- print("/* Generated by tool/CaseFolding.py */\n\n")
+ include Util
- # print folding data
+ attr_reader :fold, :fold_locale, :unfold, :unfold_locale
- # CaseFold
- print_table("CaseFold_11_Type CaseFold", fold)
+ def load(filename)
+ pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/
- # CaseFold_Locale
- print_table("CaseFold_11_Type CaseFold_Locale", fold_locale)
+ @fold = fold = {}
+ @unfold = unfold = [{}, {}, {}]
+ turkic = []
- # print unfolding data
+ IO.foreach(filename) do |line|
+ next unless res = pattern.match(line)
+ ch_from = res[1].to_i(16)
- # CaseUnfold_11
- print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0])
+ if res[2] == 'T'
+ # Turkic case folding
+ turkic << ch_from
+ next
+ end
- # CaseUnfold_11_Locale
- print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0])
+ # store folding data
+ ch_to = res[3..6].inject([]) do |a, i|
+ break a unless i
+ a << i.to_i(16)
+ end
+ fold[ch_from] = ch_to
- # CaseUnfold_12
- print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1])
+ # store unfolding data
+ i = ch_to.length - 1
+ (unfold[i][ch_to] ||= []) << ch_from
+ end
- # CaseUnfold_12_Locale
- print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1])
+ # move locale dependent data to (un)fold_locale
+ @fold_locale = fold_locale = {}
+ @unfold_locale = unfold_locale = [{}, {}]
+ for ch_from in turkic
+ key = fold[ch_from]
+ i = key.length - 1
+ unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
+ fold_locale[ch_from] = fold.delete(ch_from)
+ end
+ self
+ end
- # CaseUnfold_13
- print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2])
+ def display(dest)
+ # print the header
+ dest.print("/* DO NOT EDIT THIS FILE. */\n")
+ dest.print("/* Generated by enc/unicode/case-folding.rb */\n\n")
+
+ # print folding data
+
+ # CaseFold + CaseFold_Locale
+ name = "CaseFold_11"
+ print_table(dest, name, "CaseFold"=>fold)
+ print_table(dest, name, "CaseFold_Locale"=>fold_locale)
+
+ # print unfolding data
+
+ # CaseUnfold_11 + CaseUnfold_11_Locale
+ name = "CaseUnfold_11"
+ print_table(dest, name, name=>unfold[0])
+ print_table(dest, name, "#{name}_Locale"=>unfold_locale[0])
+
+ # CaseUnfold_12 + CaseUnfold_12_Locale
+ name = "CaseUnfold_12"
+ print_table(dest, name, name=>unfold[1])
+ print_table(dest, name, "#{name}_Locale"=>unfold_locale[1])
+
+ # CaseUnfold_13
+ name = "CaseUnfold_13"
+ print_table(dest, name, name=>unfold[2])
+
+ # table sizes
+ fold_table_size = fold.size + fold_locale.size
+ dest.printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2))
+ unfold1_table_size = unfold[0].size + unfold_locale[0].size
+ dest.printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2))
+ unfold2_table_size = unfold[1].size + unfold_locale[1].size
+ dest.printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5))
+ unfold3_table_size = unfold[2].size
+ dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
+ end
- # table sizes
- fold_table_size = fold.size + fold_locale.size
- printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2))
- unfold1_table_size = unfold[0].size + unfold_locale[0].size
- printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2))
- unfold2_table_size = unfold[1].size + unfold_locale[1].size
- printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5))
- unfold3_table_size = unfold[2].size
- printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
+ def self.load(*args)
+ new.load(*args)
+ end
end
-filename = ARGV[0] || 'CaseFolding.txt'
-print_case_folding_data(filename)
+if $0 == __FILE__
+ require 'optparse'
+ dest = nil
+ fold_1 = false
+ ARGV.options do |opt|
+ opt.banner << " [INPUT]"
+ opt.on("--output-file=FILE", "-o", "output to the FILE instead of STDOUT") {|output|
+ dest = (output unless output == '-')
+ }
+ opt.parse!
+ abort(opt.to_s) if ARGV.size > 1
+ end
+ filename = ARGV[0] || 'CaseFolding.txt'
+ data = CaseFolding.load(filename)
+ if dest
+ open(dest, "wb") do |f|
+ data.display(f)
+ end
+ else
+ data.display(STDOUT)
+ end
+end