summaryrefslogtreecommitdiff
path: root/enc
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-27 08:06:17 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-27 08:06:17 +0000
commitc4e6964141ef8ec7f42cc8ded29434b3a235c0ae (patch)
tree7c08517242778a5c48b85511a23c16ce9d370866 /enc
parentbafa7f90efe9138cf505776192603a49667c58a2 (diff)
downloadruby-c4e6964141ef8ec7f42cc8ded29434b3a235c0ae.tar.gz
* enc/unicode/case-folding.rb, casefold.h: Reducing size of TitleCase
table by eliminating duplicates. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53957 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rwxr-xr-xenc/unicode/case-folding.rb7
-rw-r--r--enc/unicode/casefold.h210
2 files changed, 92 insertions, 125 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index dfb5f47314..cd14b6e6f1 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -238,8 +238,11 @@ class CaseMapping
flags += '|U' if to==item.upper
flags += '|D' if to==item.lower
unless item.upper == item.title
- flags += "|T(#{@titlecase.length})"
- @titlecase << item
+ unless title_index = @titlecase.find_index { |i| i.title==item.title }
+ title_index = @titlecase.length
+ @titlecase << item
+ end
+ flags += "|T(#{title_index})"
end
end
flags
diff --git a/enc/unicode/casefold.h b/enc/unicode/casefold.h
index ab45e4362c..6e7140cf7f 100644
--- a/enc/unicode/casefold.h
+++ b/enc/unicode/casefold.h
@@ -161,11 +161,11 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x01b8, {1|F|D, {0x01b9}}},
{0x01bc, {1|F|D, {0x01bd}}},
{0x01c4, {1|F|D|T(1), {0x01c6}}},
- {0x01c5, {1|F|D|T(2), {0x01c6}}},
- {0x01c7, {1|F|D|T(3), {0x01c9}}},
- {0x01c8, {1|F|D|T(4), {0x01c9}}},
- {0x01ca, {1|F|D|T(5), {0x01cc}}},
- {0x01cb, {1|F|D|T(6), {0x01cc}}},
+ {0x01c5, {1|F|D|T(1), {0x01c6}}},
+ {0x01c7, {1|F|D|T(2), {0x01c9}}},
+ {0x01c8, {1|F|D|T(2), {0x01c9}}},
+ {0x01ca, {1|F|D|T(3), {0x01cc}}},
+ {0x01cb, {1|F|D|T(3), {0x01cc}}},
{0x01cd, {1|F|D, {0x01ce}}},
{0x01cf, {1|F|D, {0x01d0}}},
{0x01d1, {1|F|D, {0x01d2}}},
@@ -184,8 +184,8 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x01ec, {1|F|D, {0x01ed}}},
{0x01ee, {1|F|D, {0x01ef}}},
{0x01f0, {2|F, {0x006a, 0x030c}}},
- {0x01f1, {1|F|D|T(7), {0x01f3}}},
- {0x01f2, {1|F|D|T(8), {0x01f3}}},
+ {0x01f1, {1|F|D|T(4), {0x01f3}}},
+ {0x01f2, {1|F|D|T(4), {0x01f3}}},
{0x01f4, {1|F|D, {0x01f5}}},
{0x01f6, {1|F|D, {0x0195}}},
{0x01f7, {1|F|D, {0x01bf}}},
@@ -486,7 +486,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0554, {1|F|D, {0x0584}}},
{0x0555, {1|F|D, {0x0585}}},
{0x0556, {1|F|D, {0x0586}}},
- {0x0587, {2|F|T(9), {0x0565, 0x0582}}},
+ {0x0587, {2|F|T(5), {0x0565, 0x0582}}},
{0x10a0, {1|F|D, {0x2d00}}},
{0x10a1, {1|F|D, {0x2d01}}},
{0x10a2, {1|F|D, {0x2d02}}},
@@ -715,75 +715,75 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x1f6d, {1|F|D, {0x1f65}}},
{0x1f6e, {1|F|D, {0x1f66}}},
{0x1f6f, {1|F|D, {0x1f67}}},
- {0x1f80, {2|F|T(10), {0x1f00, 0x03b9}}},
- {0x1f81, {2|F|T(11), {0x1f01, 0x03b9}}},
- {0x1f82, {2|F|T(12), {0x1f02, 0x03b9}}},
- {0x1f83, {2|F|T(13), {0x1f03, 0x03b9}}},
- {0x1f84, {2|F|T(14), {0x1f04, 0x03b9}}},
- {0x1f85, {2|F|T(15), {0x1f05, 0x03b9}}},
- {0x1f86, {2|F|T(16), {0x1f06, 0x03b9}}},
- {0x1f87, {2|F|T(17), {0x1f07, 0x03b9}}},
- {0x1f88, {2|F|T(18), {0x1f00, 0x03b9}}},
- {0x1f89, {2|F|T(19), {0x1f01, 0x03b9}}},
- {0x1f8a, {2|F|T(20), {0x1f02, 0x03b9}}},
- {0x1f8b, {2|F|T(21), {0x1f03, 0x03b9}}},
- {0x1f8c, {2|F|T(22), {0x1f04, 0x03b9}}},
- {0x1f8d, {2|F|T(23), {0x1f05, 0x03b9}}},
- {0x1f8e, {2|F|T(24), {0x1f06, 0x03b9}}},
- {0x1f8f, {2|F|T(25), {0x1f07, 0x03b9}}},
- {0x1f90, {2|F|T(26), {0x1f20, 0x03b9}}},
- {0x1f91, {2|F|T(27), {0x1f21, 0x03b9}}},
- {0x1f92, {2|F|T(28), {0x1f22, 0x03b9}}},
- {0x1f93, {2|F|T(29), {0x1f23, 0x03b9}}},
- {0x1f94, {2|F|T(30), {0x1f24, 0x03b9}}},
- {0x1f95, {2|F|T(31), {0x1f25, 0x03b9}}},
- {0x1f96, {2|F|T(32), {0x1f26, 0x03b9}}},
- {0x1f97, {2|F|T(33), {0x1f27, 0x03b9}}},
- {0x1f98, {2|F|T(34), {0x1f20, 0x03b9}}},
- {0x1f99, {2|F|T(35), {0x1f21, 0x03b9}}},
- {0x1f9a, {2|F|T(36), {0x1f22, 0x03b9}}},
- {0x1f9b, {2|F|T(37), {0x1f23, 0x03b9}}},
- {0x1f9c, {2|F|T(38), {0x1f24, 0x03b9}}},
- {0x1f9d, {2|F|T(39), {0x1f25, 0x03b9}}},
- {0x1f9e, {2|F|T(40), {0x1f26, 0x03b9}}},
- {0x1f9f, {2|F|T(41), {0x1f27, 0x03b9}}},
- {0x1fa0, {2|F|T(42), {0x1f60, 0x03b9}}},
- {0x1fa1, {2|F|T(43), {0x1f61, 0x03b9}}},
- {0x1fa2, {2|F|T(44), {0x1f62, 0x03b9}}},
- {0x1fa3, {2|F|T(45), {0x1f63, 0x03b9}}},
- {0x1fa4, {2|F|T(46), {0x1f64, 0x03b9}}},
- {0x1fa5, {2|F|T(47), {0x1f65, 0x03b9}}},
- {0x1fa6, {2|F|T(48), {0x1f66, 0x03b9}}},
- {0x1fa7, {2|F|T(49), {0x1f67, 0x03b9}}},
- {0x1fa8, {2|F|T(50), {0x1f60, 0x03b9}}},
- {0x1fa9, {2|F|T(51), {0x1f61, 0x03b9}}},
- {0x1faa, {2|F|T(52), {0x1f62, 0x03b9}}},
- {0x1fab, {2|F|T(53), {0x1f63, 0x03b9}}},
- {0x1fac, {2|F|T(54), {0x1f64, 0x03b9}}},
- {0x1fad, {2|F|T(55), {0x1f65, 0x03b9}}},
- {0x1fae, {2|F|T(56), {0x1f66, 0x03b9}}},
- {0x1faf, {2|F|T(57), {0x1f67, 0x03b9}}},
- {0x1fb2, {2|F|T(58), {0x1f70, 0x03b9}}},
- {0x1fb3, {2|F|T(59), {0x03b1, 0x03b9}}},
- {0x1fb4, {2|F|T(60), {0x03ac, 0x03b9}}},
+ {0x1f80, {2|F|T(6), {0x1f00, 0x03b9}}},
+ {0x1f81, {2|F|T(7), {0x1f01, 0x03b9}}},
+ {0x1f82, {2|F|T(8), {0x1f02, 0x03b9}}},
+ {0x1f83, {2|F|T(9), {0x1f03, 0x03b9}}},
+ {0x1f84, {2|F|T(10), {0x1f04, 0x03b9}}},
+ {0x1f85, {2|F|T(11), {0x1f05, 0x03b9}}},
+ {0x1f86, {2|F|T(12), {0x1f06, 0x03b9}}},
+ {0x1f87, {2|F|T(13), {0x1f07, 0x03b9}}},
+ {0x1f88, {2|F|T(6), {0x1f00, 0x03b9}}},
+ {0x1f89, {2|F|T(7), {0x1f01, 0x03b9}}},
+ {0x1f8a, {2|F|T(8), {0x1f02, 0x03b9}}},
+ {0x1f8b, {2|F|T(9), {0x1f03, 0x03b9}}},
+ {0x1f8c, {2|F|T(10), {0x1f04, 0x03b9}}},
+ {0x1f8d, {2|F|T(11), {0x1f05, 0x03b9}}},
+ {0x1f8e, {2|F|T(12), {0x1f06, 0x03b9}}},
+ {0x1f8f, {2|F|T(13), {0x1f07, 0x03b9}}},
+ {0x1f90, {2|F|T(14), {0x1f20, 0x03b9}}},
+ {0x1f91, {2|F|T(15), {0x1f21, 0x03b9}}},
+ {0x1f92, {2|F|T(16), {0x1f22, 0x03b9}}},
+ {0x1f93, {2|F|T(17), {0x1f23, 0x03b9}}},
+ {0x1f94, {2|F|T(18), {0x1f24, 0x03b9}}},
+ {0x1f95, {2|F|T(19), {0x1f25, 0x03b9}}},
+ {0x1f96, {2|F|T(20), {0x1f26, 0x03b9}}},
+ {0x1f97, {2|F|T(21), {0x1f27, 0x03b9}}},
+ {0x1f98, {2|F|T(14), {0x1f20, 0x03b9}}},
+ {0x1f99, {2|F|T(15), {0x1f21, 0x03b9}}},
+ {0x1f9a, {2|F|T(16), {0x1f22, 0x03b9}}},
+ {0x1f9b, {2|F|T(17), {0x1f23, 0x03b9}}},
+ {0x1f9c, {2|F|T(18), {0x1f24, 0x03b9}}},
+ {0x1f9d, {2|F|T(19), {0x1f25, 0x03b9}}},
+ {0x1f9e, {2|F|T(20), {0x1f26, 0x03b9}}},
+ {0x1f9f, {2|F|T(21), {0x1f27, 0x03b9}}},
+ {0x1fa0, {2|F|T(22), {0x1f60, 0x03b9}}},
+ {0x1fa1, {2|F|T(23), {0x1f61, 0x03b9}}},
+ {0x1fa2, {2|F|T(24), {0x1f62, 0x03b9}}},
+ {0x1fa3, {2|F|T(25), {0x1f63, 0x03b9}}},
+ {0x1fa4, {2|F|T(26), {0x1f64, 0x03b9}}},
+ {0x1fa5, {2|F|T(27), {0x1f65, 0x03b9}}},
+ {0x1fa6, {2|F|T(28), {0x1f66, 0x03b9}}},
+ {0x1fa7, {2|F|T(29), {0x1f67, 0x03b9}}},
+ {0x1fa8, {2|F|T(22), {0x1f60, 0x03b9}}},
+ {0x1fa9, {2|F|T(23), {0x1f61, 0x03b9}}},
+ {0x1faa, {2|F|T(24), {0x1f62, 0x03b9}}},
+ {0x1fab, {2|F|T(25), {0x1f63, 0x03b9}}},
+ {0x1fac, {2|F|T(26), {0x1f64, 0x03b9}}},
+ {0x1fad, {2|F|T(27), {0x1f65, 0x03b9}}},
+ {0x1fae, {2|F|T(28), {0x1f66, 0x03b9}}},
+ {0x1faf, {2|F|T(29), {0x1f67, 0x03b9}}},
+ {0x1fb2, {2|F|T(30), {0x1f70, 0x03b9}}},
+ {0x1fb3, {2|F|T(31), {0x03b1, 0x03b9}}},
+ {0x1fb4, {2|F|T(32), {0x03ac, 0x03b9}}},
{0x1fb6, {2|F, {0x03b1, 0x0342}}},
- {0x1fb7, {3|F|T(61), {0x03b1, 0x0342, 0x03b9}}},
+ {0x1fb7, {3|F|T(33), {0x03b1, 0x0342, 0x03b9}}},
{0x1fb8, {1|F|D, {0x1fb0}}},
{0x1fb9, {1|F|D, {0x1fb1}}},
{0x1fba, {1|F|D, {0x1f70}}},
{0x1fbb, {1|F|D, {0x1f71}}},
- {0x1fbc, {2|F|T(62), {0x03b1, 0x03b9}}},
+ {0x1fbc, {2|F|T(31), {0x03b1, 0x03b9}}},
{0x1fbe, {1|F, {0x03b9}}},
- {0x1fc2, {2|F|T(63), {0x1f74, 0x03b9}}},
- {0x1fc3, {2|F|T(64), {0x03b7, 0x03b9}}},
- {0x1fc4, {2|F|T(65), {0x03ae, 0x03b9}}},
+ {0x1fc2, {2|F|T(34), {0x1f74, 0x03b9}}},
+ {0x1fc3, {2|F|T(35), {0x03b7, 0x03b9}}},
+ {0x1fc4, {2|F|T(36), {0x03ae, 0x03b9}}},
{0x1fc6, {2|F, {0x03b7, 0x0342}}},
- {0x1fc7, {3|F|T(66), {0x03b7, 0x0342, 0x03b9}}},
+ {0x1fc7, {3|F|T(37), {0x03b7, 0x0342, 0x03b9}}},
{0x1fc8, {1|F|D, {0x1f72}}},
{0x1fc9, {1|F|D, {0x1f73}}},
{0x1fca, {1|F|D, {0x1f74}}},
{0x1fcb, {1|F|D, {0x1f75}}},
- {0x1fcc, {2|F|T(67), {0x03b7, 0x03b9}}},
+ {0x1fcc, {2|F|T(35), {0x03b7, 0x03b9}}},
{0x1fd2, {3|F, {0x03b9, 0x0308, 0x0300}}},
{0x1fd3, {3|F, {0x03b9, 0x0308, 0x0301}}},
{0x1fd6, {2|F, {0x03b9, 0x0342}}},
@@ -802,16 +802,16 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x1fea, {1|F|D, {0x1f7a}}},
{0x1feb, {1|F|D, {0x1f7b}}},
{0x1fec, {1|F|D, {0x1fe5}}},
- {0x1ff2, {2|F|T(68), {0x1f7c, 0x03b9}}},
- {0x1ff3, {2|F|T(69), {0x03c9, 0x03b9}}},
- {0x1ff4, {2|F|T(70), {0x03ce, 0x03b9}}},
+ {0x1ff2, {2|F|T(38), {0x1f7c, 0x03b9}}},
+ {0x1ff3, {2|F|T(39), {0x03c9, 0x03b9}}},
+ {0x1ff4, {2|F|T(40), {0x03ce, 0x03b9}}},
{0x1ff6, {2|F, {0x03c9, 0x0342}}},
- {0x1ff7, {3|F|T(71), {0x03c9, 0x0342, 0x03b9}}},
+ {0x1ff7, {3|F|T(41), {0x03c9, 0x0342, 0x03b9}}},
{0x1ff8, {1|F|D, {0x1f78}}},
{0x1ff9, {1|F|D, {0x1f79}}},
{0x1ffa, {1|F|D, {0x1f7c}}},
{0x1ffb, {1|F|D, {0x1f7d}}},
- {0x1ffc, {2|F|T(72), {0x03c9, 0x03b9}}},
+ {0x1ffc, {2|F|T(39), {0x03c9, 0x03b9}}},
{0x2126, {1|F|D, {0x03c9}}},
{0x212a, {1|F|D, {0x006b}}},
{0x212b, {1|F|D, {0x00e5}}},
@@ -1161,18 +1161,18 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0xabbd, {1|F|U, {0x13ed}}},
{0xabbe, {1|F|U, {0x13ee}}},
{0xabbf, {1|F|U, {0x13ef}}},
- {0xfb00, {2|F|T(73), {0x0066, 0x0066}}},
- {0xfb01, {2|F|T(74), {0x0066, 0x0069}}},
- {0xfb02, {2|F|T(75), {0x0066, 0x006c}}},
- {0xfb03, {3|F|T(76), {0x0066, 0x0066, 0x0069}}},
- {0xfb04, {3|F|T(77), {0x0066, 0x0066, 0x006c}}},
- {0xfb05, {2|F|T(78), {0x0073, 0x0074}}},
- {0xfb06, {2|F|T(79), {0x0073, 0x0074}}},
- {0xfb13, {2|F|T(80), {0x0574, 0x0576}}},
- {0xfb14, {2|F|T(81), {0x0574, 0x0565}}},
- {0xfb15, {2|F|T(82), {0x0574, 0x056b}}},
- {0xfb16, {2|F|T(83), {0x057e, 0x0576}}},
- {0xfb17, {2|F|T(84), {0x0574, 0x056d}}},
+ {0xfb00, {2|F|T(42), {0x0066, 0x0066}}},
+ {0xfb01, {2|F|T(43), {0x0066, 0x0069}}},
+ {0xfb02, {2|F|T(44), {0x0066, 0x006c}}},
+ {0xfb03, {3|F|T(45), {0x0066, 0x0066, 0x0069}}},
+ {0xfb04, {3|F|T(46), {0x0066, 0x0066, 0x006c}}},
+ {0xfb05, {2|F|T(47), {0x0073, 0x0074}}},
+ {0xfb06, {2|F|T(47), {0x0073, 0x0074}}},
+ {0xfb13, {2|F|T(48), {0x0574, 0x0576}}},
+ {0xfb14, {2|F|T(49), {0x0574, 0x0565}}},
+ {0xfb15, {2|F|T(50), {0x0574, 0x056b}}},
+ {0xfb16, {2|F|T(51), {0x057e, 0x0576}}},
+ {0xfb17, {2|F|T(52), {0x0574, 0x056d}}},
{0xff21, {1|F|D, {0xff41}}},
{0xff22, {1|F|D, {0xff42}}},
{0xff23, {1|F|D, {0xff43}}},
@@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01b9, {1|U, {0x01b8}}},
{0x01bd, {1|U, {0x01bc}}},
{0x01bf, {1|U, {0x01f7}}},
- {0x01c6, {2|U|T(85), {0x01c4, 0x01c5}}},
- {0x01c9, {2|U|T(86), {0x01c7, 0x01c8}}},
- {0x01cc, {2|U|T(87), {0x01ca, 0x01cb}}},
+ {0x01c6, {2|U|T(1), {0x01c4, 0x01c5}}},
+ {0x01c9, {2|U|T(2), {0x01c7, 0x01c8}}},
+ {0x01cc, {2|U|T(3), {0x01ca, 0x01cb}}},
{0x01ce, {1|U, {0x01cd}}},
{0x01d0, {1|U, {0x01cf}}},
{0x01d2, {1|U, {0x01d1}}},
@@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01eb, {1|U, {0x01ea}}},
{0x01ed, {1|U, {0x01ec}}},
{0x01ef, {1|U, {0x01ee}}},
- {0x01f3, {2|U|T(88), {0x01f1, 0x01f2}}},
+ {0x01f3, {2|U|T(4), {0x01f1, 0x01f2}}},
{0x01f5, {1|U, {0x01f4}}},
{0x01f9, {1|U, {0x01f8}}},
{0x01fb, {1|U, {0x01fa}}},
@@ -6249,12 +6249,8 @@ onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
CodePointList3 TitleCase[] = {
{2, {0x0053, 0x0073}},
{1, {0x01C5}},
- {1, {0x01C5}},
- {1, {0x01C8}},
{1, {0x01C8}},
{1, {0x01CB}},
- {1, {0x01CB}},
- {1, {0x01F2}},
{1, {0x01F2}},
{2, {0x0535, 0x0582}},
{1, {0x1F88}},
@@ -6265,22 +6261,6 @@ CodePointList3 TitleCase[] = {
{1, {0x1F8D}},
{1, {0x1F8E}},
{1, {0x1F8F}},
- {1, {0x1F88}},
- {1, {0x1F89}},
- {1, {0x1F8A}},
- {1, {0x1F8B}},
- {1, {0x1F8C}},
- {1, {0x1F8D}},
- {1, {0x1F8E}},
- {1, {0x1F8F}},
- {1, {0x1F98}},
- {1, {0x1F99}},
- {1, {0x1F9A}},
- {1, {0x1F9B}},
- {1, {0x1F9C}},
- {1, {0x1F9D}},
- {1, {0x1F9E}},
- {1, {0x1F9F}},
{1, {0x1F98}},
{1, {0x1F99}},
{1, {0x1F9A}},
@@ -6297,43 +6277,27 @@ CodePointList3 TitleCase[] = {
{1, {0x1FAD}},
{1, {0x1FAE}},
{1, {0x1FAF}},
- {1, {0x1FA8}},
- {1, {0x1FA9}},
- {1, {0x1FAA}},
- {1, {0x1FAB}},
- {1, {0x1FAC}},
- {1, {0x1FAD}},
- {1, {0x1FAE}},
- {1, {0x1FAF}},
{2, {0x1FBA, 0x0345}},
{1, {0x1FBC}},
{2, {0x0386, 0x0345}},
{3, {0x0391, 0x0342, 0x0345}},
- {1, {0x1FBC}},
{2, {0x1FCA, 0x0345}},
{1, {0x1FCC}},
{2, {0x0389, 0x0345}},
{3, {0x0397, 0x0342, 0x0345}},
- {1, {0x1FCC}},
{2, {0x1FFA, 0x0345}},
{1, {0x1FFC}},
{2, {0x038F, 0x0345}},
{3, {0x03A9, 0x0342, 0x0345}},
- {1, {0x1FFC}},
{2, {0x0046, 0x0066}},
{2, {0x0046, 0x0069}},
{2, {0x0046, 0x006C}},
{3, {0x0046, 0x0066, 0x0069}},
{3, {0x0046, 0x0066, 0x006C}},
{2, {0x0053, 0x0074}},
- {2, {0x0053, 0x0074}},
{2, {0x0544, 0x0576}},
{2, {0x0544, 0x0565}},
{2, {0x0544, 0x056B}},
{2, {0x054E, 0x0576}},
{2, {0x0544, 0x056D}},
- {1, {0x01C5}},
- {1, {0x01C8}},
- {1, {0x01CB}},
- {1, {0x01F2}},
};