summaryrefslogtreecommitdiff
path: root/test/racc/assets/namae.y
diff options
context:
space:
mode:
Diffstat (limited to 'test/racc/assets/namae.y')
-rw-r--r--test/racc/assets/namae.y302
1 files changed, 302 insertions, 0 deletions
diff --git a/test/racc/assets/namae.y b/test/racc/assets/namae.y
new file mode 100644
index 0000000000..0378345fef
--- /dev/null
+++ b/test/racc/assets/namae.y
@@ -0,0 +1,302 @@
+# -*- ruby -*-
+# vi: set ft=ruby :
+
+# Copyright (C) 2012 President and Fellows of Harvard College
+# Copyright (C) 2013-2014 Sylvester Keil
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# The views and conclusions contained in the software and documentation are
+# those of the authors and should not be interpreted as representing official
+# policies, either expressed or implied, of the copyright holder.
+
+class Namae::Parser
+
+token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
+
+expect 0
+
+rule
+
+ names : { result = [] }
+ | name { result = [val[0]] }
+ | names AND name { result = val[0] << val[2] }
+
+ name : word { result = Name.new(:given => val[0]) }
+ | display_order
+ | honorific word { result = val[0].merge(:family => val[1]) }
+ | honorific display_order { result = val[1].merge(val[0]) }
+ | sort_order
+
+ honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
+ | TITLE { result = Name.new(:title => val[0]) }
+
+ display_order : u_words word opt_suffices opt_titles
+ {
+ result = Name.new(:given => val[0], :family => val[1],
+ :suffix => val[2], :title => val[3])
+ }
+ | u_words NICK last opt_suffices opt_titles
+ {
+ result = Name.new(:given => val[0], :nick => val[1],
+ :family => val[2], :suffix => val[3], :title => val[4])
+ }
+ | u_words NICK von last opt_suffices opt_titles
+ {
+ result = Name.new(:given => val[0], :nick => val[1],
+ :particle => val[2], :family => val[3],
+ :suffix => val[4], :title => val[5])
+ }
+ | u_words von last
+ {
+ result = Name.new(:given => val[0], :particle => val[1],
+ :family => val[2])
+ }
+ | von last
+ {
+ result = Name.new(:particle => val[0], :family => val[1])
+ }
+
+ sort_order : last COMMA first
+ {
+ result = Name.new({ :family => val[0], :suffix => val[2][0],
+ :given => val[2][1] }, !!val[2][0])
+ }
+ | von last COMMA first
+ {
+ result = Name.new({ :particle => val[0], :family => val[1],
+ :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
+ }
+ | u_words von last COMMA first
+ {
+ result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
+ :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
+ }
+ ;
+
+ von : LWORD
+ | von LWORD { result = val.join(' ') }
+ | von u_words LWORD { result = val.join(' ') }
+
+ last : LWORD | u_words
+
+ first : opt_words { result = [nil,val[0]] }
+ | words opt_comma suffices { result = [val[2],val[0]] }
+ | suffices { result = [val[0],nil] }
+ | suffices COMMA words { result = [val[0],val[2]] }
+
+ u_words : u_word
+ | u_words u_word { result = val.join(' ') }
+
+ u_word : UWORD | PWORD
+
+ words : word
+ | words word { result = val.join(' ') }
+
+ opt_comma : /* empty */ | COMMA
+ opt_words : /* empty */ | words
+
+ word : LWORD | UWORD | PWORD
+
+ opt_suffices : /* empty */ | suffices
+
+ suffices : SUFFIX
+ | suffices SUFFIX { result = val.join(' ') }
+
+ opt_titles : /* empty */ | titles
+
+ titles : TITLE
+ | titles TITLE { result = val.join(' ') }
+
+---- header
+require 'singleton'
+require 'strscan'
+
+---- inner
+
+ include Singleton
+
+ attr_reader :options, :input
+
+ def initialize
+ @input, @options = StringScanner.new(''), {
+ :debug => false,
+ :prefer_comma_as_separator => false,
+ :comma => ',',
+ :stops => ',;',
+ :separator => /\s*(\band\b|\&|;)\s*/i,
+ :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
+ :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
+ :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
+ }
+ end
+
+ def debug?
+ options[:debug] || ENV['DEBUG']
+ end
+
+ def separator
+ options[:separator]
+ end
+
+ def comma
+ options[:comma]
+ end
+
+ def stops
+ options[:stops]
+ end
+
+ def title
+ options[:title]
+ end
+
+ def suffix
+ options[:suffix]
+ end
+
+ def appellation
+ options[:appellation]
+ end
+
+ def prefer_comma_as_separator?
+ options[:prefer_comma_as_separator]
+ end
+
+ def parse(input)
+ parse!(input)
+ rescue => e
+ warn e.message if debug?
+ []
+ end
+
+ def parse!(string)
+ input.string = normalize(string)
+ reset
+ do_parse
+ end
+
+ def normalize(string)
+ string = string.strip
+ string
+ end
+
+ def reset
+ @commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
+ self
+ end
+
+ private
+
+ def stack
+ @vstack || @racc_vstack || []
+ end
+
+ def last_token
+ stack[-1]
+ end
+
+ def consume_separator
+ return next_token if seen_separator?
+ @commas, @words, @initials, @suffices = 0, 0, 0, 0
+ [:AND, :AND]
+ end
+
+ def consume_comma
+ @commas += 1
+ [:COMMA, :COMMA]
+ end
+
+ def consume_word(type, word)
+ @words += 1
+
+ case type
+ when :UWORD
+ @initials += 1 if word =~ /^[[:upper:]]+\b/
+ when :SUFFIX
+ @suffices += 1
+ end
+
+ [type, word]
+ end
+
+ def seen_separator?
+ !stack.empty? && last_token == :AND
+ end
+
+ def suffix?
+ !@suffices.zero? || will_see_suffix?
+ end
+
+ def will_see_suffix?
+ input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
+ end
+
+ def will_see_initial?
+ input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
+ end
+
+ def seen_full_name?
+ prefer_comma_as_separator? && @words > 1 &&
+ (@initials > 0 || !will_see_initial?) && !will_see_suffix?
+ end
+
+ def next_token
+ case
+ when input.nil?, input.eos?
+ nil
+ when input.scan(separator)
+ consume_separator
+ when input.scan(/\s*#{comma}\s*/)
+ if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
+ consume_comma
+ else
+ consume_separator
+ end
+ when input.scan(/\s+/)
+ next_token
+ when input.scan(title)
+ consume_word(:TITLE, input.matched.strip)
+ when input.scan(suffix)
+ consume_word(:SUFFIX, input.matched.strip)
+ when input.scan(appellation)
+ [:APPELLATION, input.matched.strip]
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
+ consume_word(:UWORD, input.matched)
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
+ consume_word(:LWORD, input.matched)
+ when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
+ consume_word(:PWORD, input.matched)
+ when input.scan(/('[^'\n]+')|("[^"\n]+")/)
+ consume_word(:NICK, input.matched[1...-1])
+ else
+ raise ArgumentError,
+ "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
+ end
+ end
+
+ def on_error(tid, value, stack)
+ raise ArgumentError,
+ "Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
+ end
+
+# -*- racc -*-