1 files changed, 423 insertions, 0 deletions
diff --git a/test/racc/assets/machete.y b/test/racc/assets/machete.y
new file mode 100644
index 0000000000..ea92d47a69
--- /dev/null
+++ b/test/racc/assets/machete.y
@@ -0,0 +1,423 @@
+# Copyright (c) 2011 SUSE
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+class Machete::Parser
+
+token NIL
+token TRUE
+token FALSE
+token INTEGER
+token SYMBOL
+token STRING
+token REGEXP
+token ANY
+token EVEN
+token ODD
+token METHOD_NAME
+token CLASS_NAME
+
+start expression
+
+rule
+
+expression : primary
+           | expression "|" primary {
+               result = if val[0].is_a?(ChoiceMatcher)
+                 ChoiceMatcher.new(val[0].alternatives << val[2])
+               else
+                 ChoiceMatcher.new([val[0], val[2]])
+               end
+             }
+
+primary : node
+        | array
+        | literal
+        | any
+
+node : CLASS_NAME {
+         result = NodeMatcher.new(val[0].to_sym)
+       }
+     | CLASS_NAME "<" attrs ">" {
+         result = NodeMatcher.new(val[0].to_sym, val[2])
+       }
+
+attrs : attr
+      | attrs "," attr { result = val[0].merge(val[2]) }
+
+attr : method_name "=" expression { result = { val[0].to_sym => val[2] } }
+     | method_name "^=" SYMBOL {
+         result = {
+           val[0].to_sym => SymbolRegexpMatcher.new(
+             Regexp.new("^" + Regexp.escape(symbol_value(val[2]).to_s))
+           )
+         }
+       }
+     | method_name "$=" SYMBOL {
+         result = {
+           val[0].to_sym => SymbolRegexpMatcher.new(
+             Regexp.new(Regexp.escape(symbol_value(val[2]).to_s) + "$")
+           )
+         }
+       }
+     | method_name "*=" SYMBOL {
+         result = {
+           val[0].to_sym => SymbolRegexpMatcher.new(
+             Regexp.new(Regexp.escape(symbol_value(val[2]).to_s))
+           )
+         }
+       }
+     | method_name "^=" STRING {
+         result = {
+           val[0].to_sym => StringRegexpMatcher.new(
+             Regexp.new("^" + Regexp.escape(string_value(val[2])))
+           )
+         }
+       }
+     | method_name "$=" STRING {
+         result = {
+           val[0].to_sym => StringRegexpMatcher.new(
+             Regexp.new(Regexp.escape(string_value(val[2])) + "$")
+           )
+         }
+       }
+     | method_name "*=" STRING {
+         result = {
+           val[0].to_sym => StringRegexpMatcher.new(
+             Regexp.new(Regexp.escape(string_value(val[2])))
+           )
+         }
+       }
+     | method_name "*=" REGEXP {
+         result = {
+           val[0].to_sym => IndifferentRegexpMatcher.new(
+             Regexp.new(regexp_value(val[2]))
+           )
+         }
+       }
+
+# Hack to overcome the fact that some tokens will lex as simple tokens, not
+# METHOD_NAME tokens, and that "reserved words" will lex as separate kinds of
+# tokens.
+method_name : METHOD_NAME
+            | NIL
+            | TRUE
+            | FALSE
+            | ANY
+            | EVEN
+            | ODD
+            | "*"
+            | "+"
+            | "<"
+            | ">"
+            | "^"
+            | "|"
+
+array : "[" items_opt "]" { result = ArrayMatcher.new(val[1]) }
+
+items_opt : /* empty */ { result = [] }
+          | items
+
+items : item           { result = [val[0]] }
+      | items "," item { result = val[0] << val[2] }
+
+item : expression
+     | expression quantifier { result = Quantifier.new(val[0], *val[1]) }
+
+quantifier : "*" { result = [0, nil, 1] }
+           | "+" { result = [1, nil, 1] }
+           | "?" { result = [0, 1, 1] }
+           | "{" INTEGER "}" {
+             result = [integer_value(val[1]), integer_value(val[1]), 1]
+           }
+           | "{" INTEGER "," "}" {
+             result = [integer_value(val[1]), nil, 1]
+           }
+           | "{" "," INTEGER "}" {
+             result = [0, integer_value(val[2]), 1]
+           }
+           | "{" INTEGER "," INTEGER "}" {
+             result = [integer_value(val[1]), integer_value(val[3]), 1]
+           }
+           | "{" EVEN "}" { result = [0, nil, 2] }
+           | "{" ODD "}"  { result = [1, nil, 2] }
+
+literal : NIL     { result = LiteralMatcher.new(nil) }
+        | TRUE    { result = LiteralMatcher.new(true) }
+        | FALSE   { result = LiteralMatcher.new(false) }
+        | INTEGER { result = LiteralMatcher.new(integer_value(val[0])) }
+        | SYMBOL  { result = LiteralMatcher.new(symbol_value(val[0])) }
+        | STRING  { result = LiteralMatcher.new(string_value(val[0])) }
+        | REGEXP  { result = LiteralMatcher.new(regexp_value(val[0])) }
+
+any : ANY { result = AnyMatcher.new }
+
+---- inner
+
+include Matchers
+
+class SyntaxError < StandardError; end
+
+def parse(input)
+  @input = input
+  @pos = 0
+
+  do_parse
+end
+
+private
+
+def integer_value(value)
+  if value =~ /^0[bB]/
+    value[2..-1].to_i(2)
+  elsif value =~ /^0[oO]/
+    value[2..-1].to_i(8)
+  elsif value =~ /^0[dD]/
+    value[2..-1].to_i(10)
+  elsif value =~ /^0[xX]/
+    value[2..-1].to_i(16)
+  elsif value =~ /^0/
+    value.to_i(8)
+  else
+    value.to_i
+  end
+end
+
+def symbol_value(value)
+  value[1..-1].to_sym
+end
+
+def string_value(value)
+  quote = value[0..0]
+  if quote == "'"
+    value[1..-2].gsub("\\\\", "\\").gsub("\\'", "'")
+  elsif quote == '"'
+    value[1..-2].
+      gsub("\\\\", "\\").
+      gsub('\\"', '"').
+      gsub("\\n", "\n").
+      gsub("\\t", "\t").
+      gsub("\\r", "\r").
+      gsub("\\f", "\f").
+      gsub("\\v", "\v").
+      gsub("\\a", "\a").
+      gsub("\\e", "\e").
+      gsub("\\b", "\b").
+      gsub("\\s", "\s").
+      gsub(/\\([0-7]{1,3})/) { $1.to_i(8).chr }.
+      gsub(/\\x([0-9a-fA-F]{1,2})/) { $1.to_i(16).chr }
+  else
+    raise "Unknown quote: #{quote.inspect}."
+  end
+end
+
+REGEXP_OPTIONS = {
+  'i' => Regexp::IGNORECASE,
+  'm' => Regexp::MULTILINE,
+  'x' => Regexp::EXTENDED
+}
+
+def regexp_value(value)
+  /\A\/(.*)\/([imx]*)\z/ =~ value
+  pattern, options = $1, $2
+
+  Regexp.new(pattern, options.chars.map { |ch| REGEXP_OPTIONS[ch] }.inject(:|))
+end
+
+# "^" needs to be here because if it were among operators recognized by
+# METHOD_NAME, "^=" would be recognized as two tokens.
+SIMPLE_TOKENS = [
+  "|",
+  "<",
+  ">",
+  ",",
+  "=",
+  "^=",
+  "^",
+  "$=",
+  "[",
+  "]",
+  "*=",
+  "*",
+  "+",
+  "?",
+  "{",
+  "}"
+]
+
+COMPLEX_TOKENS = [
+  [:NIL,   /^nil/],
+  [:TRUE,  /^true/],
+  [:FALSE, /^false/],
+  # INTEGER needs to be before METHOD_NAME, otherwise e.g. "+1" would be
+  # recognized as two tokens.
+  [
+    :INTEGER,
+    /^
+      [+-]?                               # sign
+      (
+        0[bB][01]+(_[01]+)*               # binary (prefixed)
+        |
+        0[oO][0-7]+(_[0-7]+)*             # octal (prefixed)
+        |
+        0[dD]\d+(_\d+)*                   # decimal (prefixed)
+        |
+        0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)* # hexadecimal (prefixed)
+        |
+        0[0-7]*(_[0-7]+)*                 # octal (unprefixed)
+        |
+        [1-9]\d*(_\d+)*                   # decimal (unprefixed)
+      )
+    /x
+  ],
+  [
+    :SYMBOL,
+    /^
+      :
+      (
+        # class name
+        [A-Z][a-zA-Z0-9_]*
+        |
+        # regular method name
+        [a-z_][a-zA-Z0-9_]*[?!=]?
+        |
+        # instance variable name
+        @[a-zA-Z_][a-zA-Z0-9_]*
+        |
+        # class variable name
+        @@[a-zA-Z_][a-zA-Z0-9_]*
+        |
+        # operator (sorted by length, then alphabetically)
+        (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&*+\-\/<>^`|~])
+      )
+    /x
+  ],
+  [
+    :STRING,
+    /^
+      (
+        '                 # sinqle-quoted string
+          (
+            \\[\\']           # escape
+            |
+            [^']              # regular character
+          )*
+        '
+        |
+        "                 # double-quoted string
+          (
+            \\                # escape
+            (
+              [\\"ntrfvaebs]    # one-character escape
+              |
+              [0-7]{1,3}        # octal number escape
+              |
+              x[0-9a-fA-F]{1,2} # hexadecimal number escape
+            )
+            |
+            [^"]              # regular character
+          )*
+        "
+      )
+    /x
+  ],
+  [
+    :REGEXP,
+    /^
+      \/
+        (
+          \\                                          # escape
+          (
+            [\\\/ntrfvaebs\(\)\[\]\{\}\-\.\?\*\+\|\^\$] # one-character escape
+            |
+            [0-7]{2,3}                                  # octal number escape
+            |
+            x[0-9a-fA-F]{1,2}                           # hexadecimal number escape
+          )
+          |
+          [^\/]                                       # regular character
+        )*
+      \/
+      [imx]*
+    /x
+  ],
+  # ANY, EVEN and ODD need to be before METHOD_NAME, otherwise they would be
+  # recognized as method names.
+  [:ANY,  /^any/],
+  [:EVEN, /^even/],
+  [:ODD,  /^odd/],
+  # We exclude "*", "+", "<", ">", "^" and "|" from method names since they are
+  # lexed as simple tokens. This is because they have also other meanings in
+  # Machette patterns beside Ruby method names.
+  [
+    :METHOD_NAME,
+    /^
+      (
+        # regular name
+        [a-z_][a-zA-Z0-9_]*[?!=]?
+        |
+        # operator (sorted by length, then alphabetically)
+        (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&\-\/`~])
+      )
+    /x
+  ],
+  [:CLASS_NAME, /^[A-Z][a-zA-Z0-9_]*/]
+]
+
+def next_token
+  skip_whitespace
+
+  return false if remaining_input.empty?
+
+  # Complex tokens need to be before simple tokens, otherwise e.g. "<<" would be
+  # recognized as two tokens.
+
+  COMPLEX_TOKENS.each do |type, regexp|
+    if remaining_input =~ regexp
+      @pos += $&.length
+      return [type, $&]
+    end
+  end
+
+  SIMPLE_TOKENS.each do |token|
+    if remaining_input[0...token.length] == token
+      @pos += token.length
+      return [token, token]
+    end
+  end
+
+  raise SyntaxError, "Unexpected character: #{remaining_input[0..0].inspect}."
+end
+
+def skip_whitespace
+  if remaining_input =~ /\A^[ \t\r\n]+/
+    @pos += $&.length
+  end
+end
+
+def remaining_input
+  @input[@pos..-1]
+end
+
+def on_error(error_token_id, error_value, value_stack)
+  raise SyntaxError, "Unexpected token: #{error_value.inspect}."
+end