1 files changed, 302 insertions, 0 deletions
diff --git a/test/racc/assets/namae.y b/test/racc/assets/namae.y
new file mode 100644
index 0000000000..0378345fef
--- /dev/null
+++ b/test/racc/assets/namae.y
@@ -0,0 +1,302 @@
+# -*- ruby -*-
+# vi: set ft=ruby :
+
+# Copyright (C) 2012 President and Fellows of Harvard College
+# Copyright (C) 2013-2014 Sylvester Keil
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of the copyright holder.
+
+class Namae::Parser
+
+token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
+
+expect 0
+
+rule
+
+  names :                { result = [] }
+        | name           { result = [val[0]] }
+        | names AND name { result = val[0] << val[2] }
+
+  name : word            { result = Name.new(:given => val[0]) }
+       | display_order
+       | honorific word          { result = val[0].merge(:family => val[1]) }
+       | honorific display_order { result = val[1].merge(val[0]) }
+       | sort_order
+
+  honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
+            | TITLE       { result = Name.new(:title => val[0]) }
+
+  display_order : u_words word opt_suffices opt_titles
+       {
+         result = Name.new(:given => val[0], :family => val[1],
+           :suffix => val[2], :title => val[3])
+       }
+       | u_words NICK last opt_suffices opt_titles
+       {
+         result = Name.new(:given => val[0], :nick => val[1],
+           :family => val[2], :suffix => val[3], :title => val[4])
+       }
+       | u_words NICK von last opt_suffices opt_titles
+       {
+         result = Name.new(:given => val[0], :nick => val[1],
+           :particle => val[2], :family => val[3],
+           :suffix => val[4], :title => val[5])
+       }
+       | u_words von last
+       {
+         result = Name.new(:given => val[0], :particle => val[1],
+          :family => val[2])
+       }
+       | von last
+       {
+         result = Name.new(:particle => val[0], :family => val[1])
+       }
+
+  sort_order : last COMMA first
+       {
+         result = Name.new({ :family => val[0], :suffix => val[2][0],
+           :given => val[2][1] }, !!val[2][0])
+       }
+       | von last COMMA first
+       {
+         result = Name.new({ :particle => val[0], :family => val[1],
+           :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
+       }
+       | u_words von last COMMA first
+       {
+         result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
+           :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
+       }
+       ;
+
+  von : LWORD
+      | von LWORD         { result = val.join(' ') }
+      | von u_words LWORD { result = val.join(' ') }
+
+  last : LWORD | u_words
+
+  first : opt_words                 { result = [nil,val[0]] }
+        | words opt_comma suffices  { result = [val[2],val[0]] }
+        | suffices                  { result = [val[0],nil] }
+        | suffices COMMA words      { result = [val[0],val[2]] }
+
+  u_words : u_word
+          | u_words u_word { result = val.join(' ') }
+
+  u_word : UWORD | PWORD
+
+  words : word
+        | words word { result = val.join(' ') }
+
+  opt_comma : /* empty */ | COMMA
+  opt_words : /* empty */ | words
+
+  word : LWORD | UWORD | PWORD
+
+  opt_suffices : /* empty */ | suffices
+
+  suffices : SUFFIX
+           | suffices SUFFIX { result = val.join(' ') }
+
+  opt_titles : /* empty */ | titles
+
+  titles : TITLE
+         | titles TITLE { result = val.join(' ') }
+
+---- header
+require 'singleton'
+require 'strscan'
+
+---- inner
+
+  include Singleton
+
+  attr_reader :options, :input
+
+  def initialize
+    @input, @options = StringScanner.new(''), {
+      :debug => false,
+      :prefer_comma_as_separator => false,
+      :comma => ',',
+      :stops => ',;',
+      :separator => /\s*(\band\b|\&|;)\s*/i,
+      :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
+      :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
+      :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
+    }
+  end
+
+  def debug?
+    options[:debug] || ENV['DEBUG']
+  end
+
+  def separator
+    options[:separator]
+  end
+
+  def comma
+    options[:comma]
+  end
+
+  def stops
+    options[:stops]
+  end
+
+  def title
+    options[:title]
+  end
+
+  def suffix
+    options[:suffix]
+  end
+
+  def appellation
+    options[:appellation]
+  end
+
+  def prefer_comma_as_separator?
+    options[:prefer_comma_as_separator]
+  end
+
+  def parse(input)
+    parse!(input)
+  rescue => e
+    warn e.message if debug?
+    []
+  end
+
+  def parse!(string)
+    input.string = normalize(string)
+    reset
+    do_parse
+  end
+
+  def normalize(string)
+    string = string.strip
+    string
+  end
+
+  def reset
+    @commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
+    self
+  end
+
+  private
+
+  def stack
+    @vstack || @racc_vstack || []
+  end
+
+  def last_token
+    stack[-1]
+  end
+
+  def consume_separator
+    return next_token if seen_separator?
+    @commas, @words, @initials, @suffices = 0, 0, 0, 0
+    [:AND, :AND]
+  end
+
+  def consume_comma
+    @commas += 1
+    [:COMMA, :COMMA]
+  end
+
+  def consume_word(type, word)
+    @words += 1
+
+    case type
+    when :UWORD
+      @initials += 1 if word =~ /^[[:upper:]]+\b/
+    when :SUFFIX
+      @suffices += 1
+    end
+
+    [type, word]
+  end
+
+  def seen_separator?
+    !stack.empty? && last_token == :AND
+  end
+
+  def suffix?
+    !@suffices.zero? || will_see_suffix?
+  end
+
+  def will_see_suffix?
+    input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
+  end
+
+  def will_see_initial?
+    input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
+  end
+
+  def seen_full_name?
+    prefer_comma_as_separator? && @words > 1 &&
+      (@initials > 0 || !will_see_initial?) && !will_see_suffix?
+  end
+
+  def next_token
+    case
+    when input.nil?, input.eos?
+      nil
+    when input.scan(separator)
+      consume_separator
+    when input.scan(/\s*#{comma}\s*/)
+      if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
+        consume_comma
+      else
+        consume_separator
+      end
+    when input.scan(/\s+/)
+      next_token
+    when input.scan(title)
+      consume_word(:TITLE, input.matched.strip)
+    when input.scan(suffix)
+      consume_word(:SUFFIX, input.matched.strip)
+    when input.scan(appellation)
+      [:APPELLATION, input.matched.strip]
+    when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
+      consume_word(:UWORD, input.matched)
+    when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
+      consume_word(:LWORD, input.matched)
+    when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
+      consume_word(:PWORD, input.matched)
+    when input.scan(/('[^'\n]+')|("[^"\n]+")/)
+      consume_word(:NICK, input.matched[1...-1])
+    else
+      raise ArgumentError,
+        "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
+    end
+  end
+
+  def on_error(tid, value, stack)
+    raise ArgumentError,
+      "Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
+  end
+
+# -*- racc -*-