1 files changed, 734 insertions, 255 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/nfkc.rb b/storage/mroonga/vendor/groonga/lib/nfkc.rb
index 1a134384c80..9ad25bba9ff 100755
--- a/storage/mroonga/vendor/groonga/lib/nfkc.rb
+++ b/storage/mroonga/vendor/groonga/lib/nfkc.rb
@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
 # -*- coding: utf-8 -*-
 #
-# Copyright(C) 2010 Brazil
+# Copyright(C) 2010-2016 Brazil
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -16,80 +16,701 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
-$KCODE = 'u'
-
 CUSTOM_RULE_PATH = 'nfkc-custom-rules.txt'
 
-def gen_bc(file, hash, level)
-  bl = ' ' * (level * 2)
-  h2 = {}
-  hash.each{|key,val|
-    head = key[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
+class SwitchGenerator
+  def initialize(unicode_version, output)
+    @unicode_version = unicode_version
+    @output = output
+  end
+
+  def generate(bc, decompose_map, compose_map)
+    STDERR.puts('generating char type code..')
+    generate_blockcode_char_type(bc)
+    STDERR.puts('generating decompose code..')
+    generate_decompose(decompose_map)
+    STDERR.puts('generating compose code..')
+    generate_compose(compose_map)
+  end
+
+  private
+  def generate_blockcode_char_type(bc)
+    @output.puts(<<-HEADER)
+
+grn_char_type
+grn_nfkc#{@unicode_version}_char_type(const unsigned char *str)
+{
+    HEADER
+
+    @lv = 0
+    gen_bc(bc, 0)
+
+    @output.puts(<<-FOOTER)
+  return -1;
+}
+    FOOTER
+  end
+
+  def gen_bc(hash, level)
+    bl = ' ' * (level * 2)
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size < 3
+      h2.keys.sort.each{|k|
+        if (0x80 < k)
+          @output.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{@lv}; }\n", k)
+        end
+        h = h2[k]
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+        else
+          @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
+          gen_bc(h, level + 1)
+          @output.puts bl + '}'
+        end
+      }
+      @output.puts bl + "return #{@lv};"
     else
-      h2[head] = {rest => val}
+      @output.puts bl + "switch (str[#{level}]) {"
+      lk = 0x80
+      br = true
+      h2.keys.sort.each{|k|
+        if (lk < k)
+          for j in lk..k-1
+            @output.printf("#{bl}case 0x%02X :\n", j)
+          end
+          br = false
+        end
+        unless br
+          @output.puts bl + "  return #{@lv};"
+          @output.puts bl + '  break;'
+        end
+        h = h2[k]
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+          br = false
+        else
+          gen_bc(h, level + 1)
+          @output.puts bl + '  break;'
+          br = true
+        end
+        lk = k + 1
+      }
+      @output.puts bl + 'default :'
+      @output.puts bl + "  return #{@lv};"
+      @output.puts bl + '  break;'
+      @output.puts bl + '}'
     end
-  }
-  if h2.size < 3
-    h2.keys.sort.each{|k|
-      if (0x80 < k)
-        file.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{$lv}; }\n", k)
+  end
+
+  def generate_decompose(hash)
+    @output.puts(<<-HEADER)
+
+const char *
+grn_nfkc#{@unicode_version}_decompose(const unsigned char *str)
+{
+    HEADER
+
+    gen_decompose(hash, 0)
+
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+    FOOTER
+  end
+
+  def gen_decompose(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      dst = ''
+      hash[''].each_byte{|b| dst << format('\x%02X', b)}
+      @output.puts "#{bl}return \"#{dst}\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
       end
-      h = h2[k]
-      if h.keys.join =~ /^\x80*$/
-        $lv, = h.values
+    }
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
+        gen_decompose(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (str[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_decompose(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+
+  def generate_compose(compose_map)
+    @output.puts(<<-HEADER)
+
+const char *
+grn_nfkc#{@unicode_version}_compose(const unsigned char *prefix, const unsigned char *suffix)
+{
+    HEADER
+    suffix = {}
+    compose_map.each{|src,dst|
+      chars = src.chars
+      if chars.size != 2
+        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
+      end
+      s = chars.pop
+      if suffix[s]
+        suffix[s][chars.join] = dst
       else
-        file.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
-        gen_bc(file, h, level + 1)
-        file.puts bl + '}'
+        suffix[s] = {chars.join=>dst}
       end
     }
-    file.puts bl + "return #{$lv};"
-  else
-    file.puts bl + "switch (str[#{level}]) {"
-    lk = 0x80
-    br = true
-    h2.keys.sort.each{|k|
-      if (lk < k)
-        for j in lk..k-1
-          file.printf("#{bl}case 0x%02X :\n", j)
+    gen_compose_sub(suffix, 0)
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+    FOOTER
+  end
+
+  def gen_compose_sub2(hash, level, indent)
+    bl = ' ' * ((level + indent + 0) * 2)
+    if hash['']
+      @output.print "#{bl}return \""
+      hash[''].each_byte{|b| @output.printf('\x%02X', b)}
+      @output.puts "\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
+        gen_compose_sub2(val, level + 1, indent)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (prefix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_compose_sub2(h2[k], level + 1, indent)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+
+  def gen_compose_sub(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      gen_compose_sub2(hash[''], 0, level)
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
+        gen_compose_sub(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (suffix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_compose_sub(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+end
+
+class TableGenerator < SwitchGenerator
+  private
+  def name_prefix
+    "grn_nfkc#{@unicode_version}_"
+  end
+
+  def table_name(type, common_bytes)
+    suffix = common_bytes.collect {|byte| "%02x" % byte}.join("")
+    "#{name_prefix}#{type}_table_#{suffix}"
+  end
+
+  def function_name(type)
+    "#{name_prefix}#{type}"
+  end
+
+  def generate_char_convert_tables(type, return_type, byte_size_groups)
+    if return_type.end_with?("*")
+      space = ""
+    else
+      space = " "
+    end
+    byte_size_groups.keys.sort.each do |common_bytes|
+      chars = byte_size_groups[common_bytes]
+      lines = []
+      all_values = []
+      last_bytes = chars.collect {|char| char.bytes.last}
+      last_bytes.min.step(last_bytes.max).each_slice(8) do |slice|
+        values = slice.collect do |last_byte|
+          char = (common_bytes + [last_byte]).pack("c*")
+          char.force_encoding("UTF-8")
+          yield(char)
+        end
+        all_values.concat(values)
+        lines << ("  " + values.join(", "))
+      end
+
+      next if all_values.uniq.size == 1
+
+      @output.puts(<<-TABLE_HEADER)
+
+static #{return_type}#{space}#{table_name(type, common_bytes)}[] = {
+      TABLE_HEADER
+      @output.puts(lines.join(",\n"))
+      @output.puts(<<-TABLE_FOOTER)
+};
+      TABLE_FOOTER
+    end
+  end
+
+  def generate_char_convert_function(type,
+                                     argument_list,
+                                     char_variable,
+                                     default,
+                                     return_type,
+                                     byte_size_groups,
+                                     options={})
+    modifier = options[:internal] ? "static inline " : ""
+    @output.puts(<<-HEADER)
+
+#{modifier}#{return_type}
+#{function_name(type)}(#{argument_list})
+{
+    HEADER
+
+    prev_common_bytes = []
+    prev_n_common_bytes = 0
+    first_group = true
+    byte_size_groups.keys.sort.each do |common_bytes|
+      chars = byte_size_groups[common_bytes]
+      chars_bytes = chars.collect(&:bytes).sort
+      min = chars_bytes.first.last
+      max = chars_bytes.last.last
+      n_common_bytes = 0
+      if common_bytes.empty?
+        indent = "  "
+        yield(:no_common_bytes, indent, chars, chars_bytes)
+      else
+        if first_group
+          @output.puts(<<-BODY)
+  {
+          BODY
+        end
+
+        found_different_byte = false
+        common_bytes.each_with_index do |common_byte, i|
+          unless found_different_byte
+            if prev_common_bytes[i] == common_byte
+              n_common_bytes += 1
+              next
+            end
+            found_different_byte = true
+          end
+          indent = "  " * i
+          # p [i, prev_common_bytes.collect{|x| "%#04x" % x}, common_bytes.collect{|x| "%#04x" % x}, "%#04x" % common_byte, n_common_bytes, prev_n_common_bytes]
+          # TODO: The following code may be able to be simplified.
+          if prev_common_bytes[i].nil?
+            # p nil
+            @output.puts(<<-BODY)
+    #{indent}switch (#{char_variable}[#{i}]) {
+            BODY
+          elsif i < prev_n_common_bytes
+            # p :prev
+            @output.puts(<<-BODY)
+    #{indent}  default :
+    #{indent}    break;
+    #{indent}  }
+    #{indent}  break;
+            BODY
+          elsif n_common_bytes < prev_n_common_bytes
+            # p :common_prev
+            @output.puts(<<-BODY)
+    #{indent}switch (#{char_variable}[#{i}]) {
+            BODY
+          else
+            # p :else
+            prev_common_bytes.size.downto(common_bytes.size + 1) do |j|
+              sub_indent = "  " * (j - 1)
+              @output.puts(<<-BODY)
+    #{indent}#{sub_indent}default :
+    #{indent}#{sub_indent}  break;
+    #{indent}#{sub_indent}}
+    #{indent}#{sub_indent}break;
+              BODY
+            end
+          end
+          @output.puts(<<-BODY)
+    #{indent}case #{"%#04x" % common_byte} :
+          BODY
+        end
+
+        n = chars_bytes.first.size - 1
+        indent = "    " + ("  " * common_bytes.size)
+        yield(:have_common_bytes, indent, chars, chars_bytes, n, common_bytes)
+      end
+
+      prev_common_bytes = common_bytes
+      prev_n_common_bytes = n_common_bytes
+      first_group = false
+    end
+
+    # p [prev_common_bytes.collect{|x| "%#04x" % x}, prev_n_common_bytes]
+
+    (prev_common_bytes.size - 1).step(0, -1) do |i|
+      indent = "  " * i
+      @output.puts(<<-BODY)
+    #{indent}default :
+    #{indent}  break;
+    #{indent}}
+      BODY
+      if i > 0
+        @output.puts(<<-BODY)
+    #{indent}break;
+        BODY
+      end
+    end
+
+    @output.puts(<<-FOOTER)
+  }
+
+  return #{default};
+}
+    FOOTER
+  end
+
+  def generate_char_converter(type,
+                              function_type,
+                              char_map,
+                              default,
+                              return_type,
+                              options={},
+                              &converter)
+    byte_size_groups = char_map.keys.group_by do |from|
+      bytes = from.bytes
+      bytes[0..-2]
+    end
+
+    generate_char_convert_tables(type,
+                                 return_type,
+                                 byte_size_groups,
+                                 &converter)
+
+    char_variable = "utf8"
+    generate_char_convert_function(function_type,
+                                   "const unsigned char *#{char_variable}",
+                                   char_variable,
+                                   default,
+                                   return_type,
+                                   byte_size_groups,
+                                   options) do |state, *args|
+      case state
+      when :no_common_bytes
+        indent, chars, chars_bytes = args
+        if chars.size == 1
+          char = chars[0]
+          char_byte = chars_bytes.first.first
+          value = yield(char)
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[0] < 0x80) {
+#{indent}  if (#{char_variable}[0] == #{"%#04x" % char_byte}) {
+#{indent}    return #{value};
+#{indent}  } else {
+#{indent}    return #{default};
+#{indent}  }
+#{indent}} else {
+          BODY
+        else
+          min = chars_bytes.first.first
+          max = chars_bytes.last.first
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[0] < 0x80) {
+#{indent}  if (#{char_variable}[0] >= #{"%#04x" % min} &&
+#{indent}      #{char_variable}[0] <= #{"%#04x" % max}) {
+#{indent}    return #{table_name(type, [])}[#{char_variable}[0] - #{"%#04x" % min}];
+#{indent}  } else {
+#{indent}    return #{default};
+#{indent}  }
+#{indent}} else {
+          BODY
+        end
+      when :have_common_bytes
+        indent, chars, chars_bytes, n, common_bytes = args
+        if chars.size == 1
+          char = chars[0]
+          char_byte = chars_bytes.first.last
+          value = yield(char)
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[#{n}] == #{"%#04x" % char_byte}) {
+#{indent}  return #{value};
+#{indent}}
+#{indent}break;
+          BODY
+        else
+          sorted_chars = chars.sort
+          min = chars_bytes.first.last
+          max = chars_bytes.last.last
+          all_values = (min..max).collect do |last_byte|
+            char = (common_bytes + [last_byte]).pack("c*")
+            char.force_encoding("UTF-8")
+            yield(char)
+          end
+          if all_values.uniq.size == 1
+            value = all_values.first
+          else
+            value = "#{table_name(type, common_bytes)}[#{char_variable}[#{n}] - #{"%#04x" % min}]"
+          end
+          last_n_bits_for_char_in_utf8 = 6
+          max_n_chars_in_byte = 2 ** last_n_bits_for_char_in_utf8
+          if all_values.size == max_n_chars_in_byte
+            @output.puts(<<-BODY)
+#{indent}return #{value};
+            BODY
+          else
+            @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[#{n}] >= #{"%#04x" % min} &&
+#{indent}    #{char_variable}[#{n}] <= #{"%#04x" % max}) {
+#{indent}  return #{value};
+#{indent}}
+#{indent}break;
+            BODY
+          end
         end
-        br = false
       end
-      unless br
-        file.puts bl + "  return #{$lv};"
-        file.puts bl + '  break;'
+    end
+  end
+
+  def generate_blockcode_char_type(block_codes)
+    default = "GRN_CHAR_OTHERS"
+
+    char_types = {}
+    current_type = default
+    prev_char = nil
+    block_codes.keys.sort.each do |char|
+      type = block_codes[char]
+      if current_type != default
+        prev_code_point = prev_char.codepoints[0]
+        code_point = char.codepoints[0]
+        (prev_code_point...code_point).each do |target_code_point|
+          target_char = [target_code_point].pack("U*")
+          char_types[target_char] = current_type
+        end
       end
-      h = h2[k]
-      file.printf("#{bl}case 0x%02X :\n", k)
-      if h.keys.join =~ /^\x80*$/
-        $lv, = h.values
-        br = false
+      current_type = type
+      prev_char = char
+    end
+    unless current_type == default
+      raise "TODO: Consider the max unicode character"
+      max_unicode_char = "\u{10ffff}"
+      (prev_char..max_unicode_char).each do |target_char|
+        char_types[target_char] = current_type
+      end
+    end
+
+    generate_char_converter("char_type",
+                            "char_type",
+                            char_types,
+                            default,
+                            "grn_char_type") do |char|
+      char_types[char] || default
+    end
+  end
+
+  def generate_decompose(decompose_map)
+    default = "NULL"
+    generate_char_converter("decompose",
+                            "decompose",
+                            decompose_map,
+                            default,
+                            "const char *") do |from|
+      to = decompose_map[from]
+      if to
+        escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
+        "\"#{escaped_value}\""
       else
-        gen_bc(file, h, level + 1)
-        file.puts bl + '  break;'
-        br = true
+        default
       end
-      lk = k + 1
-    }
-    file.puts bl + 'default :'
-    file.puts bl + "  return #{$lv};"
-    file.puts bl + '  break;'
-    file.puts bl + '}'
+    end
+  end
+
+  def generate_compose(compose_map)
+    # require "pp"
+    # p compose_map.size
+    # pp compose_map.keys.group_by {|x| x.chars[1]}.size
+    # pp compose_map.keys.group_by {|x| x.chars[1]}.collect {|k, vs| [k, k.codepoints, vs.size, vs.group_by {|x| x.chars[0].bytesize}.collect {|k2, vs2| [k2, vs2.size]}]}
+    # pp compose_map.keys.group_by {|x| x.chars[0].bytesize}.collect {|k, vs| [k, vs.size]}
+    # pp compose_map
+
+    suffix_char_map = {}
+    compose_map.each do |source, destination|
+      chars = source.chars
+      if chars.size != 2
+        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
+        return
+      end
+      prefix, suffix = chars
+      suffix_char_map[suffix] ||= {}
+      suffix_char_map[suffix][prefix] = destination
+    end
+
+    suffix_char_map.each do |suffix, prefix_char_map|
+      suffix_bytes = suffix.bytes.collect {|byte| "%02x" % byte}.join("")
+      default = "NULL"
+      generate_char_converter("compose_prefix_#{suffix_bytes}",
+                              "compose_prefix_#{suffix_bytes}",
+                              prefix_char_map,
+                              default,
+                              "const char *",
+                              :internal => true) do |prefix|
+        to = prefix_char_map[prefix]
+        if to
+          escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
+          "\"#{escaped_value}\""
+        else
+          default
+        end
+      end
+    end
+
+
+    char_variable = "suffix_utf8"
+    argument_list =
+      "const unsigned char *prefix_utf8, " +
+      "const unsigned char *#{char_variable}"
+    default = "NULL"
+    byte_size_groups = suffix_char_map.keys.group_by do |from|
+      bytes = from.bytes
+      bytes[0..-2]
+    end
+    generate_char_convert_function("compose",
+                                   argument_list,
+                                   char_variable,
+                                   default,
+                                   "const char *",
+                                   byte_size_groups) do |type, *args|
+      case type
+      when :no_common_bytes
+        indent, chars, chars_bytes = args
+        @output.puts(<<-BODY)
+#{indent}switch (#{char_variable}[0]) {
+        BODY
+        chars.each do |char|
+          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
+          type = "compose_prefix_#{suffix_bytes}"
+          @output.puts(<<-BODY)
+#{indent}case #{"%#04x" % char.bytes.last} :
+#{indent}  return #{function_name(type)}(prefix_utf8);
+          BODY
+        end
+        @output.puts(<<-BODY)
+#{indent}default :
+#{indent}  return #{default};
+#{indent}}
+#{indent}break;
+        BODY
+      when :have_common_bytes
+        indent, chars, chars_bytes, n, common_bytes = args
+        @output.puts(<<-BODY)
+#{indent}switch (#{char_variable}[#{n}]) {
+        BODY
+        chars.each do |char|
+          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
+          type = "compose_prefix_#{suffix_bytes}"
+          @output.puts(<<-BODY)
+#{indent}case #{"%#04x" % char.bytes.last} :
+#{indent}  return #{function_name(type)}(prefix_utf8);
+          BODY
+        end
+        @output.puts(<<-BODY)
+#{indent}default :
+#{indent}  return #{default};
+#{indent}}
+#{indent}break;
+        BODY
+      end
+    end
+  end
+
+  def to_bytes_map(char_map)
+    bytes_map = {}
+    char_map.each_key do |from|
+      parent = bytes_map
+      from.bytes[0..-2].each do |byte|
+        parent[byte] ||= {}
+        parent = parent[byte]
+      end
+      parent[from.bytes.last] = char_map[from]
+    end
+    bytes_map
   end
 end
 
-def generate_blockcode_char_type(file, option)
+def create_bc(option)
   bc = {}
   open("|./icudump --#{option}").each{|l|
     src,_,code = l.chomp.split("\t")
-    str = src.split(':').collect{|c| format("%c", c.hex)}.join
+    str = src.split(':').collect(&:hex).pack("c*")
+    str.force_encoding("UTF-8")
     bc[str] = code
   }
-  $lv = 0
-  gen_bc(file, bc, 0)
+  bc
 end
 
 def ccpush(hash, src, dst)
@@ -104,7 +725,7 @@ end
 
 def subst(hash, str)
   cand = nil
-  src = str.split(//)
+  src = str.chars
   for i in 0..src.size-1
     h = hash
     for j in i..src.size-1
@@ -112,7 +733,7 @@ def subst(hash, str)
       h = h[head]
       break unless h
       if h[nil]
-        cand = src[0,i].to_s + h[nil] + src[j + 1..-1].to_s
+        cand = src[0,i].join("") + h[nil] + src[j + 1..-1].join("")
       end
     end
     return cand if cand
@@ -120,7 +741,7 @@ def subst(hash, str)
   return str
 end
 
-def map_entry(map1, cc, src, dst)
+def map_entry(decompose, cc, src, dst)
   dst.downcase! unless $case_sensitive
   loop {
     dst2 = subst(cc, dst)
@@ -130,43 +751,43 @@ def map_entry(map1, cc, src, dst)
   unless $keep_space
     dst = $1 if dst =~ /^ +([^ ].*)$/
   end
-  map1[src] = dst if src != dst
+  decompose[src] = dst if src != dst
 end
 
-def create_map1()
+def create_decompose_map()
   cc = {}
   open('|./icudump --cc').each{|l|
     _,src,dst = l.chomp.split("\t")
     if cc[src]
       STDERR.puts "caution: ambiguous mapping #{src}|#{cc[src]}|#{dst}" if cc[src] != dst
     end
-    ccpush(cc, src.split(//), dst)
+    ccpush(cc, src.chars, dst)
   }
-  map1 = {}
+  decompose_map = {}
   open('|./icudump --nfkd').each{|l|
     n,src,dst = l.chomp.split("\t")
-    map_entry(map1, cc, src, dst)
+    map_entry(decompose_map, cc, src, dst)
   }
   if File.readable?(CUSTOM_RULE_PATH)
     open(CUSTOM_RULE_PATH).each{|l|
       src,dst = l.chomp.split("\t")
-      map_entry(map1, cc, src, dst)
+      map_entry(decompose_map, cc, src, dst)
     }
   end
   unless $case_sensitive
     for c in 'A'..'Z'
-      map1[c] = c.downcase
+      decompose_map[c] = c.downcase
     end
   end
-  return map1
+  return decompose_map
 end
 
-def create_map2(map1)
+def create_compose_map(decompose_map)
   cc = {}
   open('|./icudump --cc').each{|l|
     _,src,dst = l.chomp.split("\t")
-    src = src.split(//).collect{|c| map1[c] || c}.join
-    dst = map1[dst] || dst
+    src = src.chars.collect{|c| decompose_map[c] || c}.join
+    dst = decompose_map[dst] || dst
     if cc[src] && cc[src] != dst
       STDERR.puts("caution: inconsitent mapping '#{src}' => '#{cc[src]}'|'#{dst}'")
     end
@@ -177,14 +798,15 @@ def create_map2(map1)
     cc2 = {}
     cc.each {|src,dst|
       src2 = src
-      chars = src.split(//)
+      chars = src.chars
       l = chars.size - 1
       for i in 0..l
         for j in i..l
           next if i == 0 && j == l
           str = chars[i..j].join
-          if map1[str]
-            STDERR.printf("caution: recursive mapping '%s'=>'%s'\n", str, map1[str])
+          if decompose_map[str]
+            STDERR.printf("caution: recursive mapping '%s'=>'%s'\n",
+                          str, decompose_map[str])
           end
           if cc[str]
             src2 = (i > 0 ? chars[0..i-1].join : '') + cc[str] + (j < l ? chars[j+1..l].join : '')
@@ -202,134 +824,42 @@ def create_map2(map1)
   return cc
 end
 
-def generate_map1(file, hash, level)
-  bl = ' ' * ((level + 0) * 2)
-  if hash['']
-    dst = ''
-    hash[''].each_byte{|b| dst << format('\x%02X', b)}
-    file.puts "#{bl}return \"#{dst}\";"
-    hash.delete('')
-  end
-  return if hash.empty?
-  h2 = {}
-  hash.each{|key,val|
-    head = key[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
-      generate_map1(file, val, level + 1)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (str[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      generate_map1(file, h2[k], level + 1)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
+######## main #######
 
-def gen_map2_sub2(file, hash, level, indent)
-  bl = ' ' * ((level + indent + 0) * 2)
-  if hash['']
-    file.print "#{bl}return \""
-    hash[''].each_byte{|b| file.printf('\x%02X', b)}
-    file.puts "\";"
-    hash.delete('')
+generator_class = SwitchGenerator
+ARGV.each{|arg|
+  case arg
+  when /-*c/i
+    $case_sensitive = true
+  when /-*s/i
+    $keep_space = true
+  when "--impl=switch"
+    generator_class = SwitchGenerator
+  when "--impl=table"
+    generator_class = TableGenerator
   end
-  return if hash.empty?
-
-  h2 = {}
-  hash.each{|key,val|
-    head = key[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
+}
 
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
-      gen_map2_sub2(file, val, level + 1, indent)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (prefix[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      gen_map2_sub2(file, h2[k], level + 1, indent)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
+STDERR.puts('compiling icudump')
+system('cc -Wall -O3 -o icudump -I/tmp/local/include -L/tmp/local/lib icudump.c -licuuc -licui18n')
 
-def gen_map2_sub(file, hash, level)
-  bl = ' ' * ((level + 0) * 2)
-  if hash['']
-    gen_map2_sub2(file, hash[''], 0, level)
-    hash.delete('')
-  end
-  return if hash.empty?
-  h2 = {}
-  hash.each{|key,val|
-    head = key[0]
-    rest = key[1..-1]
-    if h2[head]
-      h2[head][rest] = val
-    else
-      h2[head] = {rest => val}
-    end
-  }
-  if h2.size == 1
-    h2.each{|key,val|
-      file.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
-      gen_map2_sub(file, val, level + 1)
-      file.puts bl + '}'
-    }
-  else
-    file.puts "#{bl}switch (suffix[#{level}]) {"
-    h2.keys.sort.each{|k|
-      file.printf("#{bl}case 0x%02X :\n", k)
-      gen_map2_sub(file, h2[k], level + 1)
-      file.puts("#{bl}  break;")
-    }
-    file.puts bl + '}'
-  end
-end
+STDERR.puts('getting Unicode version')
+unicode_version = `./icudump --version`.strip.gsub(".", "")
 
-def generate_map2(file, map2)
-  suffix = {}
-  map2.each{|src,dst|
-    chars = src.split(//)
-    if chars.size != 2
-      STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
-    end
-    s = chars.pop
-    if suffix[s]
-      suffix[s][chars.join] = dst
-    else
-      suffix[s] = {chars.join=>dst}
-    end
-  }
-  gen_map2_sub(file, suffix, 0)
-end
+STDERR.puts('creating bc..')
+bc = create_bc("gc")
 
-template = <<END
+STDERR.puts('creating decompose map..')
+decompose_map = create_decompose_map()
+
+STDERR.puts('creating compose map..')
+compose_map = create_compose_map(decompose_map)
+
+File.open("nfkc#{unicode_version}.c", "w") do |output|
+  output.puts(<<-HEADER)
 /* -*- c-basic-offset: 2 -*- */
-/* Copyright(C) 2010 Brazil
+/*
+  Copyright(C) 2010-2016 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -343,76 +873,25 @@ template = <<END
   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
 
-don't edit this file by hand. it generated automatically by nfkc.rb
+/*
+  Don't edit this file by hand. it generated automatically by nfkc.rb.
 */
 
-#include "nfkc.h"
+#include "grn.h"
+#include "grn_nfkc.h"
+#include <groonga/nfkc.h>
 
 #ifdef GRN_WITH_NFKC
+  HEADER
 
-unsigned char
-grn_nfkc_char_type(const unsigned char *str)
-{
-%  return -1;
-}
+  generator = generator_class.new(unicode_version, output)
+  generator.generate(bc, decompose_map, compose_map)
 
-const char *
-grn_nfkc_map1(const unsigned char *str)
-{
-%  return 0;
-}
-
-const char *
-grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix)
-{
-%  return 0;
-}
+  output.puts(<<-FOOTER)
 
 #endif /* GRN_WITH_NFKC */
 
-END
-
-######## main #######
-
-ARGV.each{|arg|
-  case arg
-  when /-*c/i
-    $case_sensitive = true
-  when /-*s/i
-    $keep_space = true
-  end
-}
-
-STDERR.puts('compiling icudump')
-system('cc -Wall -O3 -o icudump icudump.c -licui18n')
-
-STDERR.puts('creating map1..')
-map1 = create_map1()
-
-STDERR.puts('creating map2..')
-map2 = create_map2(map1)
-
-outf = open('nfkc.c', 'w')
-
-tmps = template.split(/%/)
-
-#STDERR.puts('generating block code..')
-#outf.print(tmps.shift)
-#generate_blockcode_char_type(outf, 'bc')
-
-STDERR.puts('generating char type code..')
-outf.print(tmps.shift)
-generate_blockcode_char_type(outf, 'gc')
-
-STDERR.puts('generating map1 code..')
-outf.print(tmps.shift)
-generate_map1(outf, map1, 0)
-
-STDERR.puts('generating map2 code..')
-outf.print(tmps.shift)
-generate_map2(outf, map2)
-
-outf.print(tmps.shift)
-outf.close
-STDERR.puts('done.')
+  FOOTER
+end