summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--enc/unicode.c12
-rwxr-xr-xenc/unicode/case-folding.rb41
-rw-r--r--enc/unicode/casefold.h445
4 files changed, 307 insertions, 199 deletions
diff --git a/ChangeLog b/ChangeLog
index f2eabfe237..e9bdac4db3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Fri Mar 11 16:11:27 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * enc/unicode/case-folding.rb, casefold.h: Streamlining approach to
+ case mapping data not available from case folding by unifying all
+ three cases (special title, special upper, special lower).
+ * enc/unicode.c: Adjust macro names for above (macros are currently inactive).
+ (with Kimihito Matsui)
+
Thu Mar 10 17:34:16 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* iseq.c (prepare_iseq_build): enable coverage by coverage_enabled
diff --git a/enc/unicode.c b/enc/unicode.c
index e9c2803cab..f5a7d9b984 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -140,14 +140,22 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
#define U ONIGENC_CASE_UPCASE
#define D ONIGENC_CASE_DOWNCASE
#define F ONIGENC_CASE_FOLD
-#define T(n) (ONIGENC_CASE_TITLECASE|OnigTitlecaseEncode(n))
+#define ST 0
+#define SU 0
+#define SL 0
+#define I(n) 0
+#define L(n) 0
#include "enc/unicode/casefold.h"
#undef U
#undef D
#undef F
-#undef T
+#undef ST
+#undef SU
+#undef SL
+#undef I
+#undef L
#include "enc/unicode/name2ctype.h"
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index cd14b6e6f1..d309dd6d15 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -177,7 +177,7 @@ class CaseFolding
dest.print lookup_hash(name, "CodePointList2", data)
# TitleCase
- dest.print mapping_data.titlecase_output
+ dest.print mapping_data.specials_output
end
def debug!
@@ -203,7 +203,8 @@ end
class CaseMapping
def initialize (mapping_directory)
@mappings = {}
- @titlecase = []
+ @specials = []
+ @specials_length = 0
IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line|
next if line =~ /^</
code, _1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11, upper, lower, title = line.chomp.split ';'
@@ -237,12 +238,24 @@ class CaseMapping
if item
flags += '|U' if to==item.upper
flags += '|D' if to==item.lower
+ specials_index = nil
+ specials = []
unless item.upper == item.title
- unless title_index = @titlecase.find_index { |i| i.title==item.title }
- title_index = @titlecase.length
- @titlecase << item
- end
- flags += "|T(#{title_index})"
+ specials << item.title
+ flags += "|ST"
+ end
+ unless item.lower.nil? or item.lower==from or item.lower==to
+ specials << item.lower
+ flags += "|SL"
+ end
+ unless item.upper.nil? or item.upper==from or item.upper==to
+ specials << item.upper
+ flags += "|SU"
+ end
+ if specials.first
+ flags += "|I(#{@specials_length})"
+ @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
+ @specials << specials
end
end
flags
@@ -252,12 +265,14 @@ class CaseMapping
@debug = true
end
- def titlecase_output
- "CodePointList3 TitleCase[] = {\n" +
- @titlecase.map do |item|
- chars = item.title.split(/ /)
- ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug
- " {#{chars.length}, {#{chars.map {|c| "0x"+c }.join(', ')}#{ct}}},\n"
+ def specials_output
+ "OnigCodePoint CaseMappingSpecials[] = {\n" +
+ @specials.map do |sps|
+ ' ' + sps.map do |sp|
+ chars = sp.split(/ /)
+ ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug
+ " L(#{chars.length})|#{chars.map {|c| "0x"+c }.join(', ')}#{ct},"
+ end.join + "\n"
end.join + "};\n"
end
diff --git a/enc/unicode/casefold.h b/enc/unicode/casefold.h
index 6e7140cf7f..fcc912035d 100644
--- a/enc/unicode/casefold.h
+++ b/enc/unicode/casefold.h
@@ -28,7 +28,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0058, {1|F|D, {0x0078}}},
{0x0059, {1|F|D, {0x0079}}},
{0x005a, {1|F|D, {0x007a}}},
- {0x00b5, {1|F, {0x03bc}}},
+ {0x00b5, {1|F|SU|I(0), {0x03bc}}},
{0x00c0, {1|F|D, {0x00e0}}},
{0x00c1, {1|F|D, {0x00e1}}},
{0x00c2, {1|F|D, {0x00e2}}},
@@ -59,7 +59,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x00dc, {1|F|D, {0x00fc}}},
{0x00dd, {1|F|D, {0x00fd}}},
{0x00de, {1|F|D, {0x00fe}}},
- {0x00df, {2|F|T(0), {0x0073, 0x0073}}},
+ {0x00df, {2|F|ST|SU|I(1), {0x0073, 0x0073}}},
{0x0100, {1|F|D, {0x0101}}},
{0x0102, {1|F|D, {0x0103}}},
{0x0104, {1|F|D, {0x0105}}},
@@ -95,7 +95,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0143, {1|F|D, {0x0144}}},
{0x0145, {1|F|D, {0x0146}}},
{0x0147, {1|F|D, {0x0148}}},
- {0x0149, {2|F, {0x02bc, 0x006e}}},
+ {0x0149, {2|F|SU|I(5), {0x02bc, 0x006e}}},
{0x014a, {1|F|D, {0x014b}}},
{0x014c, {1|F|D, {0x014d}}},
{0x014e, {1|F|D, {0x014f}}},
@@ -123,7 +123,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0179, {1|F|D, {0x017a}}},
{0x017b, {1|F|D, {0x017c}}},
{0x017d, {1|F|D, {0x017e}}},
- {0x017f, {1|F, {0x0073}}},
+ {0x017f, {1|F|SU|I(7), {0x0073}}},
{0x0181, {1|F|D, {0x0253}}},
{0x0182, {1|F|D, {0x0183}}},
{0x0184, {1|F|D, {0x0185}}},
@@ -160,12 +160,12 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x01b7, {1|F|D, {0x0292}}},
{0x01b8, {1|F|D, {0x01b9}}},
{0x01bc, {1|F|D, {0x01bd}}},
- {0x01c4, {1|F|D|T(1), {0x01c6}}},
- {0x01c5, {1|F|D|T(1), {0x01c6}}},
- {0x01c7, {1|F|D|T(2), {0x01c9}}},
- {0x01c8, {1|F|D|T(2), {0x01c9}}},
- {0x01ca, {1|F|D|T(3), {0x01cc}}},
- {0x01cb, {1|F|D|T(3), {0x01cc}}},
+ {0x01c4, {1|F|D|ST|I(8), {0x01c6}}},
+ {0x01c5, {1|F|D|ST|SU|I(9), {0x01c6}}},
+ {0x01c7, {1|F|D|ST|I(11), {0x01c9}}},
+ {0x01c8, {1|F|D|ST|SU|I(12), {0x01c9}}},
+ {0x01ca, {1|F|D|ST|I(14), {0x01cc}}},
+ {0x01cb, {1|F|D|ST|SU|I(15), {0x01cc}}},
{0x01cd, {1|F|D, {0x01ce}}},
{0x01cf, {1|F|D, {0x01d0}}},
{0x01d1, {1|F|D, {0x01d2}}},
@@ -183,9 +183,9 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x01ea, {1|F|D, {0x01eb}}},
{0x01ec, {1|F|D, {0x01ed}}},
{0x01ee, {1|F|D, {0x01ef}}},
- {0x01f0, {2|F, {0x006a, 0x030c}}},
- {0x01f1, {1|F|D|T(4), {0x01f3}}},
- {0x01f2, {1|F|D|T(4), {0x01f3}}},
+ {0x01f0, {2|F|SU|I(17), {0x006a, 0x030c}}},
+ {0x01f1, {1|F|D|ST|I(19), {0x01f3}}},
+ {0x01f2, {1|F|D|ST|SU|I(20), {0x01f3}}},
{0x01f4, {1|F|D, {0x01f5}}},
{0x01f6, {1|F|D, {0x0195}}},
{0x01f7, {1|F|D, {0x01bf}}},
@@ -232,7 +232,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x024a, {1|F|D, {0x024b}}},
{0x024c, {1|F|D, {0x024d}}},
{0x024e, {1|F|D, {0x024f}}},
- {0x0345, {1|F, {0x03b9}}},
+ {0x0345, {1|F|SU|I(22), {0x03b9}}},
{0x0370, {1|F|D, {0x0371}}},
{0x0372, {1|F|D, {0x0373}}},
{0x0376, {1|F|D, {0x0377}}},
@@ -244,7 +244,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x038c, {1|F|D, {0x03cc}}},
{0x038e, {1|F|D, {0x03cd}}},
{0x038f, {1|F|D, {0x03ce}}},
- {0x0390, {3|F, {0x03b9, 0x0308, 0x0301}}},
+ {0x0390, {3|F|SU|I(23), {0x03b9, 0x0308, 0x0301}}},
{0x0391, {1|F|D, {0x03b1}}},
{0x0392, {1|F|D, {0x03b2}}},
{0x0393, {1|F|D, {0x03b3}}},
@@ -271,13 +271,13 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x03a9, {1|F|D, {0x03c9}}},
{0x03aa, {1|F|D, {0x03ca}}},
{0x03ab, {1|F|D, {0x03cb}}},
- {0x03b0, {3|F, {0x03c5, 0x0308, 0x0301}}},
- {0x03c2, {1|F, {0x03c3}}},
+ {0x03b0, {3|F|SU|I(26), {0x03c5, 0x0308, 0x0301}}},
+ {0x03c2, {1|F|SU|I(29), {0x03c3}}},
{0x03cf, {1|F|D, {0x03d7}}},
- {0x03d0, {1|F, {0x03b2}}},
- {0x03d1, {1|F, {0x03b8}}},
- {0x03d5, {1|F, {0x03c6}}},
- {0x03d6, {1|F, {0x03c0}}},
+ {0x03d0, {1|F|SU|I(30), {0x03b2}}},
+ {0x03d1, {1|F|SU|I(31), {0x03b8}}},
+ {0x03d5, {1|F|SU|I(32), {0x03c6}}},
+ {0x03d6, {1|F|SU|I(33), {0x03c0}}},
{0x03d8, {1|F|D, {0x03d9}}},
{0x03da, {1|F|D, {0x03db}}},
{0x03dc, {1|F|D, {0x03dd}}},
@@ -290,10 +290,10 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x03ea, {1|F|D, {0x03eb}}},
{0x03ec, {1|F|D, {0x03ed}}},
{0x03ee, {1|F|D, {0x03ef}}},
- {0x03f0, {1|F, {0x03ba}}},
- {0x03f1, {1|F, {0x03c1}}},
+ {0x03f0, {1|F|SU|I(34), {0x03ba}}},
+ {0x03f1, {1|F|SU|I(35), {0x03c1}}},
{0x03f4, {1|F|D, {0x03b8}}},
- {0x03f5, {1|F, {0x03b5}}},
+ {0x03f5, {1|F|SU|I(36), {0x03b5}}},
{0x03f7, {1|F|D, {0x03f8}}},
{0x03f9, {1|F|D, {0x03f2}}},
{0x03fa, {1|F|D, {0x03fb}}},
@@ -486,7 +486,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0554, {1|F|D, {0x0584}}},
{0x0555, {1|F|D, {0x0585}}},
{0x0556, {1|F|D, {0x0586}}},
- {0x0587, {2|F|T(5), {0x0565, 0x0582}}},
+ {0x0587, {2|F|ST|SU|I(37), {0x0565, 0x0582}}},
{0x10a0, {1|F|D, {0x2d00}}},
{0x10a1, {1|F|D, {0x2d01}}},
{0x10a2, {1|F|D, {0x2d02}}},
@@ -608,13 +608,13 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x1e90, {1|F|D, {0x1e91}}},
{0x1e92, {1|F|D, {0x1e93}}},
{0x1e94, {1|F|D, {0x1e95}}},
- {0x1e96, {2|F, {0x0068, 0x0331}}},
- {0x1e97, {2|F, {0x0074, 0x0308}}},
- {0x1e98, {2|F, {0x0077, 0x030a}}},
- {0x1e99, {2|F, {0x0079, 0x030a}}},
- {0x1e9a, {2|F, {0x0061, 0x02be}}},
- {0x1e9b, {1|F, {0x1e61}}},
- {0x1e9e, {2|F, {0x0073, 0x0073}}},
+ {0x1e96, {2|F|SU|I(41), {0x0068, 0x0331}}},
+ {0x1e97, {2|F|SU|I(43), {0x0074, 0x0308}}},
+ {0x1e98, {2|F|SU|I(45), {0x0077, 0x030a}}},
+ {0x1e99, {2|F|SU|I(47), {0x0079, 0x030a}}},
+ {0x1e9a, {2|F|SU|I(49), {0x0061, 0x02be}}},
+ {0x1e9b, {1|F|SU|I(51), {0x1e61}}},
+ {0x1e9e, {2|F|SL|I(52), {0x0073, 0x0073}}},
{0x1ea0, {1|F|D, {0x1ea1}}},
{0x1ea2, {1|F|D, {0x1ea3}}},
{0x1ea4, {1|F|D, {0x1ea5}}},
@@ -699,10 +699,10 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x1f4b, {1|F|D, {0x1f43}}},
{0x1f4c, {1|F|D, {0x1f44}}},
{0x1f4d, {1|F|D, {0x1f45}}},
- {0x1f50, {2|F, {0x03c5, 0x0313}}},
- {0x1f52, {3|F, {0x03c5, 0x0313, 0x0300}}},
- {0x1f54, {3|F, {0x03c5, 0x0313, 0x0301}}},
- {0x1f56, {3|F, {0x03c5, 0x0313, 0x0342}}},
+ {0x1f50, {2|F|SU|I(53), {0x03c5, 0x0313}}},
+ {0x1f52, {3|F|SU|I(55), {0x03c5, 0x0313, 0x0300}}},
+ {0x1f54, {3|F|SU|I(58), {0x03c5, 0x0313, 0x0301}}},
+ {0x1f56, {3|F|SU|I(61), {0x03c5, 0x0313, 0x0342}}},
{0x1f59, {1|F|D, {0x1f51}}},
{0x1f5b, {1|F|D, {0x1f53}}},
{0x1f5d, {1|F|D, {0x1f55}}},
@@ -715,103 +715,103 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x1f6d, {1|F|D, {0x1f65}}},
{0x1f6e, {1|F|D, {0x1f66}}},
{0x1f6f, {1|F|D, {0x1f67}}},
- {0x1f80, {2|F|T(6), {0x1f00, 0x03b9}}},
- {0x1f81, {2|F|T(7), {0x1f01, 0x03b9}}},
- {0x1f82, {2|F|T(8), {0x1f02, 0x03b9}}},
- {0x1f83, {2|F|T(9), {0x1f03, 0x03b9}}},
- {0x1f84, {2|F|T(10), {0x1f04, 0x03b9}}},
- {0x1f85, {2|F|T(11), {0x1f05, 0x03b9}}},
- {0x1f86, {2|F|T(12), {0x1f06, 0x03b9}}},
- {0x1f87, {2|F|T(13), {0x1f07, 0x03b9}}},
- {0x1f88, {2|F|T(6), {0x1f00, 0x03b9}}},
- {0x1f89, {2|F|T(7), {0x1f01, 0x03b9}}},
- {0x1f8a, {2|F|T(8), {0x1f02, 0x03b9}}},
- {0x1f8b, {2|F|T(9), {0x1f03, 0x03b9}}},
- {0x1f8c, {2|F|T(10), {0x1f04, 0x03b9}}},
- {0x1f8d, {2|F|T(11), {0x1f05, 0x03b9}}},
- {0x1f8e, {2|F|T(12), {0x1f06, 0x03b9}}},
- {0x1f8f, {2|F|T(13), {0x1f07, 0x03b9}}},
- {0x1f90, {2|F|T(14), {0x1f20, 0x03b9}}},
- {0x1f91, {2|F|T(15), {0x1f21, 0x03b9}}},
- {0x1f92, {2|F|T(16), {0x1f22, 0x03b9}}},
- {0x1f93, {2|F|T(17), {0x1f23, 0x03b9}}},
- {0x1f94, {2|F|T(18), {0x1f24, 0x03b9}}},
- {0x1f95, {2|F|T(19), {0x1f25, 0x03b9}}},
- {0x1f96, {2|F|T(20), {0x1f26, 0x03b9}}},
- {0x1f97, {2|F|T(21), {0x1f27, 0x03b9}}},
- {0x1f98, {2|F|T(14), {0x1f20, 0x03b9}}},
- {0x1f99, {2|F|T(15), {0x1f21, 0x03b9}}},
- {0x1f9a, {2|F|T(16), {0x1f22, 0x03b9}}},
- {0x1f9b, {2|F|T(17), {0x1f23, 0x03b9}}},
- {0x1f9c, {2|F|T(18), {0x1f24, 0x03b9}}},
- {0x1f9d, {2|F|T(19), {0x1f25, 0x03b9}}},
- {0x1f9e, {2|F|T(20), {0x1f26, 0x03b9}}},
- {0x1f9f, {2|F|T(21), {0x1f27, 0x03b9}}},
- {0x1fa0, {2|F|T(22), {0x1f60, 0x03b9}}},
- {0x1fa1, {2|F|T(23), {0x1f61, 0x03b9}}},
- {0x1fa2, {2|F|T(24), {0x1f62, 0x03b9}}},
- {0x1fa3, {2|F|T(25), {0x1f63, 0x03b9}}},
- {0x1fa4, {2|F|T(26), {0x1f64, 0x03b9}}},
- {0x1fa5, {2|F|T(27), {0x1f65, 0x03b9}}},
- {0x1fa6, {2|F|T(28), {0x1f66, 0x03b9}}},
- {0x1fa7, {2|F|T(29), {0x1f67, 0x03b9}}},
- {0x1fa8, {2|F|T(22), {0x1f60, 0x03b9}}},
- {0x1fa9, {2|F|T(23), {0x1f61, 0x03b9}}},
- {0x1faa, {2|F|T(24), {0x1f62, 0x03b9}}},
- {0x1fab, {2|F|T(25), {0x1f63, 0x03b9}}},
- {0x1fac, {2|F|T(26), {0x1f64, 0x03b9}}},
- {0x1fad, {2|F|T(27), {0x1f65, 0x03b9}}},
- {0x1fae, {2|F|T(28), {0x1f66, 0x03b9}}},
- {0x1faf, {2|F|T(29), {0x1f67, 0x03b9}}},
- {0x1fb2, {2|F|T(30), {0x1f70, 0x03b9}}},
- {0x1fb3, {2|F|T(31), {0x03b1, 0x03b9}}},
- {0x1fb4, {2|F|T(32), {0x03ac, 0x03b9}}},
- {0x1fb6, {2|F, {0x03b1, 0x0342}}},
- {0x1fb7, {3|F|T(33), {0x03b1, 0x0342, 0x03b9}}},
+ {0x1f80, {2|F|ST|SU|I(64), {0x1f00, 0x03b9}}},
+ {0x1f81, {2|F|ST|SU|I(67), {0x1f01, 0x03b9}}},
+ {0x1f82, {2|F|ST|SU|I(70), {0x1f02, 0x03b9}}},
+ {0x1f83, {2|F|ST|SU|I(73), {0x1f03, 0x03b9}}},
+ {0x1f84, {2|F|ST|SU|I(76), {0x1f04, 0x03b9}}},
+ {0x1f85, {2|F|ST|SU|I(79), {0x1f05, 0x03b9}}},
+ {0x1f86, {2|F|ST|SU|I(82), {0x1f06, 0x03b9}}},
+ {0x1f87, {2|F|ST|SU|I(85), {0x1f07, 0x03b9}}},
+ {0x1f88, {2|F|ST|SL|SU|I(88), {0x1f00, 0x03b9}}},
+ {0x1f89, {2|F|ST|SL|SU|I(92), {0x1f01, 0x03b9}}},
+ {0x1f8a, {2|F|ST|SL|SU|I(96), {0x1f02, 0x03b9}}},
+ {0x1f8b, {2|F|ST|SL|SU|I(100), {0x1f03, 0x03b9}}},
+ {0x1f8c, {2|F|ST|SL|SU|I(104), {0x1f04, 0x03b9}}},
+ {0x1f8d, {2|F|ST|SL|SU|I(108), {0x1f05, 0x03b9}}},
+ {0x1f8e, {2|F|ST|SL|SU|I(112), {0x1f06, 0x03b9}}},
+ {0x1f8f, {2|F|ST|SL|SU|I(116), {0x1f07, 0x03b9}}},
+ {0x1f90, {2|F|ST|SU|I(120), {0x1f20, 0x03b9}}},
+ {0x1f91, {2|F|ST|SU|I(123), {0x1f21, 0x03b9}}},
+ {0x1f92, {2|F|ST|SU|I(126), {0x1f22, 0x03b9}}},
+ {0x1f93, {2|F|ST|SU|I(129), {0x1f23, 0x03b9}}},
+ {0x1f94, {2|F|ST|SU|I(132), {0x1f24, 0x03b9}}},
+ {0x1f95, {2|F|ST|SU|I(135), {0x1f25, 0x03b9}}},
+ {0x1f96, {2|F|ST|SU|I(138), {0x1f26, 0x03b9}}},
+ {0x1f97, {2|F|ST|SU|I(141), {0x1f27, 0x03b9}}},
+ {0x1f98, {2|F|ST|SL|SU|I(144), {0x1f20, 0x03b9}}},
+ {0x1f99, {2|F|ST|SL|SU|I(148), {0x1f21, 0x03b9}}},
+ {0x1f9a, {2|F|ST|SL|SU|I(152), {0x1f22, 0x03b9}}},
+ {0x1f9b, {2|F|ST|SL|SU|I(156), {0x1f23, 0x03b9}}},
+ {0x1f9c, {2|F|ST|SL|SU|I(160), {0x1f24, 0x03b9}}},
+ {0x1f9d, {2|F|ST|SL|SU|I(164), {0x1f25, 0x03b9}}},
+ {0x1f9e, {2|F|ST|SL|SU|I(168), {0x1f26, 0x03b9}}},
+ {0x1f9f, {2|F|ST|SL|SU|I(172), {0x1f27, 0x03b9}}},
+ {0x1fa0, {2|F|ST|SU|I(176), {0x1f60, 0x03b9}}},
+ {0x1fa1, {2|F|ST|SU|I(179), {0x1f61, 0x03b9}}},
+ {0x1fa2, {2|F|ST|SU|I(182), {0x1f62, 0x03b9}}},
+ {0x1fa3, {2|F|ST|SU|I(185), {0x1f63, 0x03b9}}},
+ {0x1fa4, {2|F|ST|SU|I(188), {0x1f64, 0x03b9}}},
+ {0x1fa5, {2|F|ST|SU|I(191), {0x1f65, 0x03b9}}},
+ {0x1fa6, {2|F|ST|SU|I(194), {0x1f66, 0x03b9}}},
+ {0x1fa7, {2|F|ST|SU|I(197), {0x1f67, 0x03b9}}},
+ {0x1fa8, {2|F|ST|SL|SU|I(200), {0x1f60, 0x03b9}}},
+ {0x1fa9, {2|F|ST|SL|SU|I(204), {0x1f61, 0x03b9}}},
+ {0x1faa, {2|F|ST|SL|SU|I(208), {0x1f62, 0x03b9}}},
+ {0x1fab, {2|F|ST|SL|SU|I(212), {0x1f63, 0x03b9}}},
+ {0x1fac, {2|F|ST|SL|SU|I(216), {0x1f64, 0x03b9}}},
+ {0x1fad, {2|F|ST|SL|SU|I(220), {0x1f65, 0x03b9}}},
+ {0x1fae, {2|F|ST|SL|SU|I(224), {0x1f66, 0x03b9}}},
+ {0x1faf, {2|F|ST|SL|SU|I(228), {0x1f67, 0x03b9}}},
+ {0x1fb2, {2|F|ST|SU|I(232), {0x1f70, 0x03b9}}},
+ {0x1fb3, {2|F|ST|SU|I(236), {0x03b1, 0x03b9}}},
+ {0x1fb4, {2|F|ST|SU|I(239), {0x03ac, 0x03b9}}},
+ {0x1fb6, {2|F|SU|I(243), {0x03b1, 0x0342}}},
+ {0x1fb7, {3|F|ST|SU|I(245), {0x03b1, 0x0342, 0x03b9}}},
{0x1fb8, {1|F|D, {0x1fb0}}},
{0x1fb9, {1|F|D, {0x1fb1}}},
{0x1fba, {1|F|D, {0x1f70}}},
{0x1fbb, {1|F|D, {0x1f71}}},
- {0x1fbc, {2|F|T(31), {0x03b1, 0x03b9}}},
- {0x1fbe, {1|F, {0x03b9}}},
- {0x1fc2, {2|F|T(34), {0x1f74, 0x03b9}}},
- {0x1fc3, {2|F|T(35), {0x03b7, 0x03b9}}},
- {0x1fc4, {2|F|T(36), {0x03ae, 0x03b9}}},
- {0x1fc6, {2|F, {0x03b7, 0x0342}}},
- {0x1fc7, {3|F|T(37), {0x03b7, 0x0342, 0x03b9}}},
+ {0x1fbc, {2|F|ST|SL|SU|I(251), {0x03b1, 0x03b9}}},
+ {0x1fbe, {1|F|SU|I(255), {0x03b9}}},
+ {0x1fc2, {2|F|ST|SU|I(256), {0x1f74, 0x03b9}}},
+ {0x1fc3, {2|F|ST|SU|I(260), {0x03b7, 0x03b9}}},
+ {0x1fc4, {2|F|ST|SU|I(263), {0x03ae, 0x03b9}}},
+ {0x1fc6, {2|F|SU|I(267), {0x03b7, 0x0342}}},
+ {0x1fc7, {3|F|ST|SU|I(269), {0x03b7, 0x0342, 0x03b9}}},
{0x1fc8, {1|F|D, {0x1f72}}},
{0x1fc9, {1|F|D, {0x1f73}}},
{0x1fca, {1|F|D, {0x1f74}}},
{0x1fcb, {1|F|D, {0x1f75}}},
- {0x1fcc, {2|F|T(35), {0x03b7, 0x03b9}}},
- {0x1fd2, {3|F, {0x03b9, 0x0308, 0x0300}}},
- {0x1fd3, {3|F, {0x03b9, 0x0308, 0x0301}}},
- {0x1fd6, {2|F, {0x03b9, 0x0342}}},
- {0x1fd7, {3|F, {0x03b9, 0x0308, 0x0342}}},
+ {0x1fcc, {2|F|ST|SL|SU|I(275), {0x03b7, 0x03b9}}},
+ {0x1fd2, {3|F|SU|I(279), {0x03b9, 0x0308, 0x0300}}},
+ {0x1fd3, {3|F|SU|I(282), {0x03b9, 0x0308, 0x0301}}},
+ {0x1fd6, {2|F|SU|I(285), {0x03b9, 0x0342}}},
+ {0x1fd7, {3|F|SU|I(287), {0x03b9, 0x0308, 0x0342}}},
{0x1fd8, {1|F|D, {0x1fd0}}},
{0x1fd9, {1|F|D, {0x1fd1}}},
{0x1fda, {1|F|D, {0x1f76}}},
{0x1fdb, {1|F|D, {0x1f77}}},
- {0x1fe2, {3|F, {0x03c5, 0x0308, 0x0300}}},
- {0x1fe3, {3|F, {0x03c5, 0x0308, 0x0301}}},
- {0x1fe4, {2|F, {0x03c1, 0x0313}}},
- {0x1fe6, {2|F, {0x03c5, 0x0342}}},
- {0x1fe7, {3|F, {0x03c5, 0x0308, 0x0342}}},
+ {0x1fe2, {3|F|SU|I(290), {0x03c5, 0x0308, 0x0300}}},
+ {0x1fe3, {3|F|SU|I(293), {0x03c5, 0x0308, 0x0301}}},
+ {0x1fe4, {2|F|SU|I(296), {0x03c1, 0x0313}}},
+ {0x1fe6, {2|F|SU|I(298), {0x03c5, 0x0342}}},
+ {0x1fe7, {3|F|SU|I(300), {0x03c5, 0x0308, 0x0342}}},
{0x1fe8, {1|F|D, {0x1fe0}}},
{0x1fe9, {1|F|D, {0x1fe1}}},
{0x1fea, {1|F|D, {0x1f7a}}},
{0x1feb, {1|F|D, {0x1f7b}}},
{0x1fec, {1|F|D, {0x1fe5}}},
- {0x1ff2, {2|F|T(38), {0x1f7c, 0x03b9}}},
- {0x1ff3, {2|F|T(39), {0x03c9, 0x03b9}}},
- {0x1ff4, {2|F|T(40), {0x03ce, 0x03b9}}},
- {0x1ff6, {2|F, {0x03c9, 0x0342}}},
- {0x1ff7, {3|F|T(41), {0x03c9, 0x0342, 0x03b9}}},
+ {0x1ff2, {2|F|ST|SU|I(303), {0x1f7c, 0x03b9}}},
+ {0x1ff3, {2|F|ST|SU|I(307), {0x03c9, 0x03b9}}},
+ {0x1ff4, {2|F|ST|SU|I(310), {0x03ce, 0x03b9}}},
+ {0x1ff6, {2|F|SU|I(314), {0x03c9, 0x0342}}},
+ {0x1ff7, {3|F|ST|SU|I(316), {0x03c9, 0x0342, 0x03b9}}},
{0x1ff8, {1|F|D, {0x1f78}}},
{0x1ff9, {1|F|D, {0x1f79}}},
{0x1ffa, {1|F|D, {0x1f7c}}},
{0x1ffb, {1|F|D, {0x1f7d}}},
- {0x1ffc, {2|F|T(39), {0x03c9, 0x03b9}}},
+ {0x1ffc, {2|F|ST|SL|SU|I(322), {0x03c9, 0x03b9}}},
{0x2126, {1|F|D, {0x03c9}}},
{0x212a, {1|F|D, {0x006b}}},
{0x212b, {1|F|D, {0x00e5}}},
@@ -1161,18 +1161,18 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0xabbd, {1|F|U, {0x13ed}}},
{0xabbe, {1|F|U, {0x13ee}}},
{0xabbf, {1|F|U, {0x13ef}}},
- {0xfb00, {2|F|T(42), {0x0066, 0x0066}}},
- {0xfb01, {2|F|T(43), {0x0066, 0x0069}}},
- {0xfb02, {2|F|T(44), {0x0066, 0x006c}}},
- {0xfb03, {3|F|T(45), {0x0066, 0x0066, 0x0069}}},
- {0xfb04, {3|F|T(46), {0x0066, 0x0066, 0x006c}}},
- {0xfb05, {2|F|T(47), {0x0073, 0x0074}}},
- {0xfb06, {2|F|T(47), {0x0073, 0x0074}}},
- {0xfb13, {2|F|T(48), {0x0574, 0x0576}}},
- {0xfb14, {2|F|T(49), {0x0574, 0x0565}}},
- {0xfb15, {2|F|T(50), {0x0574, 0x056b}}},
- {0xfb16, {2|F|T(51), {0x057e, 0x0576}}},
- {0xfb17, {2|F|T(52), {0x0574, 0x056d}}},
+ {0xfb00, {2|F|ST|SU|I(326), {0x0066, 0x0066}}},
+ {0xfb01, {2|F|ST|SU|I(330), {0x0066, 0x0069}}},
+ {0xfb02, {2|F|ST|SU|I(334), {0x0066, 0x006c}}},
+ {0xfb03, {3|F|ST|SU|I(338), {0x0066, 0x0066, 0x0069}}},
+ {0xfb04, {3|F|ST|SU|I(344), {0x0066, 0x0066, 0x006c}}},
+ {0xfb05, {2|F|ST|SU|I(350), {0x0073, 0x0074}}},
+ {0xfb06, {2|F|ST|SU|I(354), {0x0073, 0x0074}}},
+ {0xfb13, {2|F|ST|SU|I(358), {0x0574, 0x0576}}},
+ {0xfb14, {2|F|ST|SU|I(362), {0x0574, 0x0565}}},
+ {0xfb15, {2|F|ST|SU|I(366), {0x0574, 0x056b}}},
+ {0xfb16, {2|F|ST|SU|I(370), {0x057e, 0x0576}}},
+ {0xfb17, {2|F|ST|SU|I(374), {0x0574, 0x056d}}},
{0xff21, {1|F|D, {0xff41}}},
{0xff22, {1|F|D, {0xff42}}},
{0xff23, {1|F|D, {0xff43}}},
@@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01b9, {1|U, {0x01b8}}},
{0x01bd, {1|U, {0x01bc}}},
{0x01bf, {1|U, {0x01f7}}},
- {0x01c6, {2|U|T(1), {0x01c4, 0x01c5}}},
- {0x01c9, {2|U|T(2), {0x01c7, 0x01c8}}},
- {0x01cc, {2|U|T(3), {0x01ca, 0x01cb}}},
+ {0x01c6, {2|U|ST|I(378), {0x01c4, 0x01c5}}},
+ {0x01c9, {2|U|ST|I(379), {0x01c7, 0x01c8}}},
+ {0x01cc, {2|U|ST|I(380), {0x01ca, 0x01cb}}},
{0x01ce, {1|U, {0x01cd}}},
{0x01d0, {1|U, {0x01cf}}},
{0x01d2, {1|U, {0x01d1}}},
@@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01eb, {1|U, {0x01ea}}},
{0x01ed, {1|U, {0x01ec}}},
{0x01ef, {1|U, {0x01ee}}},
- {0x01f3, {2|U|T(4), {0x01f1, 0x01f2}}},
+ {0x01f3, {2|U|ST|I(381), {0x01f1, 0x01f2}}},
{0x01f5, {1|U, {0x01f4}}},
{0x01f9, {1|U, {0x01f8}}},
{0x01fb, {1|U, {0x01fa}}},
@@ -3412,10 +3412,10 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x03b6, {1|U, {0x0396}}},
{0x03b7, {1|U, {0x0397}}},
{0x03b8, {3|U, {0x0398, 0x03d1, 0x03f4}}},
- {0x03b9, {3, {0x0345, 0x0399, 0x1fbe}}},
+ {0x03b9, {3|SU|I(382), {0x0345, 0x0399, 0x1fbe}}},
{0x03ba, {2|U, {0x039a, 0x03f0}}},
{0x03bb, {1|U, {0x039b}}},
- {0x03bc, {2, {0x00b5, 0x039c}}},
+ {0x03bc, {2|SU|I(383), {0x00b5, 0x039c}}},
{0x03bd, {1|U, {0x039d}}},
{0x03be, {1|U, {0x039e}}},
{0x03bf, {1|U, {0x039f}}},
@@ -6246,58 +6246,135 @@ onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
return 0;
}
-CodePointList3 TitleCase[] = {
- {2, {0x0053, 0x0073}},
- {1, {0x01C5}},
- {1, {0x01C8}},
- {1, {0x01CB}},
- {1, {0x01F2}},
- {2, {0x0535, 0x0582}},
- {1, {0x1F88}},
- {1, {0x1F89}},
- {1, {0x1F8A}},
- {1, {0x1F8B}},
- {1, {0x1F8C}},
- {1, {0x1F8D}},
- {1, {0x1F8E}},
- {1, {0x1F8F}},
- {1, {0x1F98}},
- {1, {0x1F99}},
- {1, {0x1F9A}},
- {1, {0x1F9B}},
- {1, {0x1F9C}},
- {1, {0x1F9D}},
- {1, {0x1F9E}},
- {1, {0x1F9F}},
- {1, {0x1FA8}},
- {1, {0x1FA9}},
- {1, {0x1FAA}},
- {1, {0x1FAB}},
- {1, {0x1FAC}},
- {1, {0x1FAD}},
- {1, {0x1FAE}},
- {1, {0x1FAF}},
- {2, {0x1FBA, 0x0345}},
- {1, {0x1FBC}},
- {2, {0x0386, 0x0345}},
- {3, {0x0391, 0x0342, 0x0345}},
- {2, {0x1FCA, 0x0345}},
- {1, {0x1FCC}},
- {2, {0x0389, 0x0345}},
- {3, {0x0397, 0x0342, 0x0345}},
- {2, {0x1FFA, 0x0345}},
- {1, {0x1FFC}},
- {2, {0x038F, 0x0345}},
- {3, {0x03A9, 0x0342, 0x0345}},
- {2, {0x0046, 0x0066}},
- {2, {0x0046, 0x0069}},
- {2, {0x0046, 0x006C}},
- {3, {0x0046, 0x0066, 0x0069}},
- {3, {0x0046, 0x0066, 0x006C}},
- {2, {0x0053, 0x0074}},
- {2, {0x0544, 0x0576}},
- {2, {0x0544, 0x0565}},
- {2, {0x0544, 0x056B}},
- {2, {0x054E, 0x0576}},
- {2, {0x0544, 0x056D}},
+OnigCodePoint CaseMappingSpecials[] = {
+ L(1)|0x039C,
+ L(2)|0x0053, 0x0073, L(2)|0x0053, 0x0053,
+ L(2)|0x02BC, 0x004E,
+ L(1)|0x0053,
+ L(1)|0x01C5,
+ L(1)|0x01C5, L(1)|0x01C4,
+ L(1)|0x01C8,
+ L(1)|0x01C8, L(1)|0x01C7,
+ L(1)|0x01CB,
+ L(1)|0x01CB, L(1)|0x01CA,
+ L(2)|0x004A, 0x030C,
+ L(1)|0x01F2,
+ L(1)|0x01F2, L(1)|0x01F1,
+ L(1)|0x0399,
+ L(3)|0x0399, 0x0308, 0x0301,
+ L(3)|0x03A5, 0x0308, 0x0301,
+ L(1)|0x03A3,
+ L(1)|0x0392,
+ L(1)|0x0398,
+ L(1)|0x03A6,
+ L(1)|0x03A0,
+ L(1)|0x039A,
+ L(1)|0x03A1,
+ L(1)|0x0395,
+ L(2)|0x0535, 0x0582, L(2)|0x0535, 0x0552,
+ L(2)|0x0048, 0x0331,
+ L(2)|0x0054, 0x0308,
+ L(2)|0x0057, 0x030A,
+ L(2)|0x0059, 0x030A,
+ L(2)|0x0041, 0x02BE,
+ L(1)|0x1E60,
+ L(1)|0x00DF,
+ L(2)|0x03A5, 0x0313,
+ L(3)|0x03A5, 0x0313, 0x0300,
+ L(3)|0x03A5, 0x0313, 0x0301,
+ L(3)|0x03A5, 0x0313, 0x0342,
+ L(1)|0x1F88, L(2)|0x1F08, 0x0399,
+ L(1)|0x1F89, L(2)|0x1F09, 0x0399,
+ L(1)|0x1F8A, L(2)|0x1F0A, 0x0399,
+ L(1)|0x1F8B, L(2)|0x1F0B, 0x0399,
+ L(1)|0x1F8C, L(2)|0x1F0C, 0x0399,
+ L(1)|0x1F8D, L(2)|0x1F0D, 0x0399,
+ L(1)|0x1F8E, L(2)|0x1F0E, 0x0399,
+ L(1)|0x1F8F, L(2)|0x1F0F, 0x0399,
+ L(1)|0x1F88, L(1)|0x1F80, L(2)|0x1F08, 0x0399,
+ L(1)|0x1F89, L(1)|0x1F81, L(2)|0x1F09, 0x0399,
+ L(1)|0x1F8A, L(1)|0x1F82, L(2)|0x1F0A, 0x0399,
+ L(1)|0x1F8B, L(1)|0x1F83, L(2)|0x1F0B, 0x0399,
+ L(1)|0x1F8C, L(1)|0x1F84, L(2)|0x1F0C, 0x0399,
+ L(1)|0x1F8D, L(1)|0x1F85, L(2)|0x1F0D, 0x0399,
+ L(1)|0x1F8E, L(1)|0x1F86, L(2)|0x1F0E, 0x0399,
+ L(1)|0x1F8F, L(1)|0x1F87, L(2)|0x1F0F, 0x0399,
+ L(1)|0x1F98, L(2)|0x1F28, 0x0399,
+ L(1)|0x1F99, L(2)|0x1F29, 0x0399,
+ L(1)|0x1F9A, L(2)|0x1F2A, 0x0399,
+ L(1)|0x1F9B, L(2)|0x1F2B, 0x0399,
+ L(1)|0x1F9C, L(2)|0x1F2C, 0x0399,
+ L(1)|0x1F9D, L(2)|0x1F2D, 0x0399,
+ L(1)|0x1F9E, L(2)|0x1F2E, 0x0399,
+ L(1)|0x1F9F, L(2)|0x1F2F, 0x0399,
+ L(1)|0x1F98, L(1)|0x1F90, L(2)|0x1F28, 0x0399,
+ L(1)|0x1F99, L(1)|0x1F91, L(2)|0x1F29, 0x0399,
+ L(1)|0x1F9A, L(1)|0x1F92, L(2)|0x1F2A, 0x0399,
+ L(1)|0x1F9B, L(1)|0x1F93, L(2)|0x1F2B, 0x0399,
+ L(1)|0x1F9C, L(1)|0x1F94, L(2)|0x1F2C, 0x0399,
+ L(1)|0x1F9D, L(1)|0x1F95, L(2)|0x1F2D, 0x0399,
+ L(1)|0x1F9E, L(1)|0x1F96, L(2)|0x1F2E, 0x0399,
+ L(1)|0x1F9F, L(1)|0x1F97, L(2)|0x1F2F, 0x0399,
+ L(1)|0x1FA8, L(2)|0x1F68, 0x0399,
+ L(1)|0x1FA9, L(2)|0x1F69, 0x0399,
+ L(1)|0x1FAA, L(2)|0x1F6A, 0x0399,
+ L(1)|0x1FAB, L(2)|0x1F6B, 0x0399,
+ L(1)|0x1FAC, L(2)|0x1F6C, 0x0399,
+ L(1)|0x1FAD, L(2)|0x1F6D, 0x0399,
+ L(1)|0x1FAE, L(2)|0x1F6E, 0x0399,
+ L(1)|0x1FAF, L(2)|0x1F6F, 0x0399,
+ L(1)|0x1FA8, L(1)|0x1FA0, L(2)|0x1F68, 0x0399,
+ L(1)|0x1FA9, L(1)|0x1FA1, L(2)|0x1F69, 0x0399,
+ L(1)|0x1FAA, L(1)|0x1FA2, L(2)|0x1F6A, 0x0399,
+ L(1)|0x1FAB, L(1)|0x1FA3, L(2)|0x1F6B, 0x0399,
+ L(1)|0x1FAC, L(1)|0x1FA4, L(2)|0x1F6C, 0x0399,
+ L(1)|0x1FAD, L(1)|0x1FA5, L(2)|0x1F6D, 0x0399,
+ L(1)|0x1FAE, L(1)|0x1FA6, L(2)|0x1F6E, 0x0399,
+ L(1)|0x1FAF, L(1)|0x1FA7, L(2)|0x1F6F, 0x0399,
+ L(2)|0x1FBA, 0x0345, L(2)|0x1FBA, 0x0399,
+ L(1)|0x1FBC, L(2)|0x0391, 0x0399,
+ L(2)|0x0386, 0x0345, L(2)|0x0386, 0x0399,
+ L(2)|0x0391, 0x0342,
+ L(3)|0x0391, 0x0342, 0x0345, L(3)|0x0391, 0x0342, 0x0399,
+ L(1)|0x1FBC, L(1)|0x1FB3, L(2)|0x0391, 0x0399,
+ L(1)|0x0399,
+ L(2)|0x1FCA, 0x0345, L(2)|0x1FCA, 0x0399,
+ L(1)|0x1FCC, L(2)|0x0397, 0x0399,
+ L(2)|0x0389, 0x0345, L(2)|0x0389, 0x0399,
+ L(2)|0x0397, 0x0342,
+ L(3)|0x0397, 0x0342, 0x0345, L(3)|0x0397, 0x0342, 0x0399,
+ L(1)|0x1FCC, L(1)|0x1FC3, L(2)|0x0397, 0x0399,
+ L(3)|0x0399, 0x0308, 0x0300,
+ L(3)|0x0399, 0x0308, 0x0301,
+ L(2)|0x0399, 0x0342,
+ L(3)|0x0399, 0x0308, 0x0342,
+ L(3)|0x03A5, 0x0308, 0x0300,
+ L(3)|0x03A5, 0x0308, 0x0301,
+ L(2)|0x03A1, 0x0313,
+ L(2)|0x03A5, 0x0342,
+ L(3)|0x03A5, 0x0308, 0x0342,
+ L(2)|0x1FFA, 0x0345, L(2)|0x1FFA, 0x0399,
+ L(1)|0x1FFC, L(2)|0x03A9, 0x0399,
+ L(2)|0x038F, 0x0345, L(2)|0x038F, 0x0399,
+ L(2)|0x03A9, 0x0342,
+ L(3)|0x03A9, 0x0342, 0x0345, L(3)|0x03A9, 0x0342, 0x0399,
+ L(1)|0x1FFC, L(1)|0x1FF3, L(2)|0x03A9, 0x0399,
+ L(2)|0x0046, 0x0066, L(2)|0x0046, 0x0046,
+ L(2)|0x0046, 0x0069, L(2)|0x0046, 0x0049,
+ L(2)|0x0046, 0x006C, L(2)|0x0046, 0x004C,
+ L(3)|0x0046, 0x0066, 0x0069, L(3)|0x0046, 0x0046, 0x0049,
+ L(3)|0x0046, 0x0066, 0x006C, L(3)|0x0046, 0x0046, 0x004C,
+ L(2)|0x0053, 0x0074, L(2)|0x0053, 0x0054,
+ L(2)|0x0053, 0x0074, L(2)|0x0053, 0x0054,
+ L(2)|0x0544, 0x0576, L(2)|0x0544, 0x0546,
+ L(2)|0x0544, 0x0565, L(2)|0x0544, 0x0535,
+ L(2)|0x0544, 0x056B, L(2)|0x0544, 0x053B,
+ L(2)|0x054E, 0x0576, L(2)|0x054E, 0x0546,
+ L(2)|0x0544, 0x056D, L(2)|0x0544, 0x053D,
+ L(1)|0x01C5,
+ L(1)|0x01C8,
+ L(1)|0x01CB,
+ L(1)|0x01F2,
+ L(1)|0x0399,
+ L(1)|0x039C,
};