diff options
Diffstat (limited to 'test/racc/assets/machete.y')
-rw-r--r-- | test/racc/assets/machete.y | 423 |
1 files changed, 423 insertions, 0 deletions
diff --git a/test/racc/assets/machete.y b/test/racc/assets/machete.y new file mode 100644 index 0000000000..ea92d47a69 --- /dev/null +++ b/test/racc/assets/machete.y @@ -0,0 +1,423 @@ +# Copyright (c) 2011 SUSE +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +class Machete::Parser + +token NIL +token TRUE +token FALSE +token INTEGER +token SYMBOL +token STRING +token REGEXP +token ANY +token EVEN +token ODD +token METHOD_NAME +token CLASS_NAME + +start expression + +rule + +expression : primary + | expression "|" primary { + result = if val[0].is_a?(ChoiceMatcher) + ChoiceMatcher.new(val[0].alternatives << val[2]) + else + ChoiceMatcher.new([val[0], val[2]]) + end + } + +primary : node + | array + | literal + | any + +node : CLASS_NAME { + result = NodeMatcher.new(val[0].to_sym) + } + | CLASS_NAME "<" attrs ">" { + result = NodeMatcher.new(val[0].to_sym, val[2]) + } + +attrs : attr + | attrs "," attr { result = val[0].merge(val[2]) } + +attr : method_name "=" expression { result = { val[0].to_sym => val[2] } } + | method_name "^=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new("^" + Regexp.escape(symbol_value(val[2]).to_s)) + ) + } + } + | method_name "$=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new(Regexp.escape(symbol_value(val[2]).to_s) + "$") + ) + } + } + | method_name "*=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new(Regexp.escape(symbol_value(val[2]).to_s)) + ) + } + } + | method_name "^=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new("^" + Regexp.escape(string_value(val[2]))) + ) + } + } + | method_name "$=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new(Regexp.escape(string_value(val[2])) + "$") + ) + } + } + | method_name "*=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new(Regexp.escape(string_value(val[2]))) + ) + } + } + | method_name "*=" REGEXP { + result = { + val[0].to_sym => IndifferentRegexpMatcher.new( + Regexp.new(regexp_value(val[2])) + ) + } + } + +# Hack to overcome the fact that some tokens will lex as simple tokens, not +# METHOD_NAME tokens, and that "reserved words" will lex as separate kinds of +# tokens. +method_name : METHOD_NAME + | NIL + | TRUE + | FALSE + | ANY + | EVEN + | ODD + | "*" + | "+" + | "<" + | ">" + | "^" + | "|" + +array : "[" items_opt "]" { result = ArrayMatcher.new(val[1]) } + +items_opt : /* empty */ { result = [] } + | items + +items : item { result = [val[0]] } + | items "," item { result = val[0] << val[2] } + +item : expression + | expression quantifier { result = Quantifier.new(val[0], *val[1]) } + +quantifier : "*" { result = [0, nil, 1] } + | "+" { result = [1, nil, 1] } + | "?" { result = [0, 1, 1] } + | "{" INTEGER "}" { + result = [integer_value(val[1]), integer_value(val[1]), 1] + } + | "{" INTEGER "," "}" { + result = [integer_value(val[1]), nil, 1] + } + | "{" "," INTEGER "}" { + result = [0, integer_value(val[2]), 1] + } + | "{" INTEGER "," INTEGER "}" { + result = [integer_value(val[1]), integer_value(val[3]), 1] + } + | "{" EVEN "}" { result = [0, nil, 2] } + | "{" ODD "}" { result = [1, nil, 2] } + +literal : NIL { result = LiteralMatcher.new(nil) } + | TRUE { result = LiteralMatcher.new(true) } + | FALSE { result = LiteralMatcher.new(false) } + | INTEGER { result = LiteralMatcher.new(integer_value(val[0])) } + | SYMBOL { result = LiteralMatcher.new(symbol_value(val[0])) } + | STRING { result = LiteralMatcher.new(string_value(val[0])) } + | REGEXP { result = LiteralMatcher.new(regexp_value(val[0])) } + +any : ANY { result = AnyMatcher.new } + +---- inner + +include Matchers + +class SyntaxError < StandardError; end + +def parse(input) + @input = input + @pos = 0 + + do_parse +end + +private + +def integer_value(value) + if value =~ /^0[bB]/ + value[2..-1].to_i(2) + elsif value =~ /^0[oO]/ + value[2..-1].to_i(8) + elsif value =~ /^0[dD]/ + value[2..-1].to_i(10) + elsif value =~ /^0[xX]/ + value[2..-1].to_i(16) + elsif value =~ /^0/ + value.to_i(8) + else + value.to_i + end +end + +def symbol_value(value) + value[1..-1].to_sym +end + +def string_value(value) + quote = value[0..0] + if quote == "'" + value[1..-2].gsub("\\\\", "\\").gsub("\\'", "'") + elsif quote == '"' + value[1..-2]. + gsub("\\\\", "\\"). + gsub('\\"', '"'). + gsub("\\n", "\n"). + gsub("\\t", "\t"). + gsub("\\r", "\r"). + gsub("\\f", "\f"). + gsub("\\v", "\v"). + gsub("\\a", "\a"). + gsub("\\e", "\e"). + gsub("\\b", "\b"). + gsub("\\s", "\s"). + gsub(/\\([0-7]{1,3})/) { $1.to_i(8).chr }. + gsub(/\\x([0-9a-fA-F]{1,2})/) { $1.to_i(16).chr } + else + raise "Unknown quote: #{quote.inspect}." + end +end + +REGEXP_OPTIONS = { + 'i' => Regexp::IGNORECASE, + 'm' => Regexp::MULTILINE, + 'x' => Regexp::EXTENDED +} + +def regexp_value(value) + /\A\/(.*)\/([imx]*)\z/ =~ value + pattern, options = $1, $2 + + Regexp.new(pattern, options.chars.map { |ch| REGEXP_OPTIONS[ch] }.inject(:|)) +end + +# "^" needs to be here because if it were among operators recognized by +# METHOD_NAME, "^=" would be recognized as two tokens. +SIMPLE_TOKENS = [ + "|", + "<", + ">", + ",", + "=", + "^=", + "^", + "$=", + "[", + "]", + "*=", + "*", + "+", + "?", + "{", + "}" +] + +COMPLEX_TOKENS = [ + [:NIL, /^nil/], + [:TRUE, /^true/], + [:FALSE, /^false/], + # INTEGER needs to be before METHOD_NAME, otherwise e.g. "+1" would be + # recognized as two tokens. + [ + :INTEGER, + /^ + [+-]? # sign + ( + 0[bB][01]+(_[01]+)* # binary (prefixed) + | + 0[oO][0-7]+(_[0-7]+)* # octal (prefixed) + | + 0[dD]\d+(_\d+)* # decimal (prefixed) + | + 0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)* # hexadecimal (prefixed) + | + 0[0-7]*(_[0-7]+)* # octal (unprefixed) + | + [1-9]\d*(_\d+)* # decimal (unprefixed) + ) + /x + ], + [ + :SYMBOL, + /^ + : + ( + # class name + [A-Z][a-zA-Z0-9_]* + | + # regular method name + [a-z_][a-zA-Z0-9_]*[?!=]? + | + # instance variable name + @[a-zA-Z_][a-zA-Z0-9_]* + | + # class variable name + @@[a-zA-Z_][a-zA-Z0-9_]* + | + # operator (sorted by length, then alphabetically) + (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&*+\-\/<>^`|~]) + ) + /x + ], + [ + :STRING, + /^ + ( + ' # sinqle-quoted string + ( + \\[\\'] # escape + | + [^'] # regular character + )* + ' + | + " # double-quoted string + ( + \\ # escape + ( + [\\"ntrfvaebs] # one-character escape + | + [0-7]{1,3} # octal number escape + | + x[0-9a-fA-F]{1,2} # hexadecimal number escape + ) + | + [^"] # regular character + )* + " + ) + /x + ], + [ + :REGEXP, + /^ + \/ + ( + \\ # escape + ( + [\\\/ntrfvaebs\(\)\[\]\{\}\-\.\?\*\+\|\^\$] # one-character escape + | + [0-7]{2,3} # octal number escape + | + x[0-9a-fA-F]{1,2} # hexadecimal number escape + ) + | + [^\/] # regular character + )* + \/ + [imx]* + /x + ], + # ANY, EVEN and ODD need to be before METHOD_NAME, otherwise they would be + # recognized as method names. + [:ANY, /^any/], + [:EVEN, /^even/], + [:ODD, /^odd/], + # We exclude "*", "+", "<", ">", "^" and "|" from method names since they are + # lexed as simple tokens. This is because they have also other meanings in + # Machette patterns beside Ruby method names. + [ + :METHOD_NAME, + /^ + ( + # regular name + [a-z_][a-zA-Z0-9_]*[?!=]? + | + # operator (sorted by length, then alphabetically) + (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&\-\/`~]) + ) + /x + ], + [:CLASS_NAME, /^[A-Z][a-zA-Z0-9_]*/] +] + +def next_token + skip_whitespace + + return false if remaining_input.empty? + + # Complex tokens need to be before simple tokens, otherwise e.g. "<<" would be + # recognized as two tokens. + + COMPLEX_TOKENS.each do |type, regexp| + if remaining_input =~ regexp + @pos += $&.length + return [type, $&] + end + end + + SIMPLE_TOKENS.each do |token| + if remaining_input[0...token.length] == token + @pos += token.length + return [token, token] + end + end + + raise SyntaxError, "Unexpected character: #{remaining_input[0..0].inspect}." +end + +def skip_whitespace + if remaining_input =~ /\A^[ \t\r\n]+/ + @pos += $&.length + end +end + +def remaining_input + @input[@pos..-1] +end + +def on_error(error_token_id, error_value, value_stack) + raise SyntaxError, "Unexpected token: #{error_value.inspect}." +end |