diff options
Diffstat (limited to 'lib/rdoc/markup/parser.rb')
-rw-r--r-- | lib/rdoc/markup/parser.rb | 189 |
1 files changed, 119 insertions, 70 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb index c18ce821fb..ca384d0639 100644 --- a/lib/rdoc/markup/parser.rb +++ b/lib/rdoc/markup/parser.rb @@ -1,5 +1,4 @@ require 'strscan' -require 'rdoc/text' ## # A recursive-descent parser for RDoc markup. @@ -52,7 +51,9 @@ class RDoc::Markup::Parser attr_reader :tokens ## - # Parses +str+ into a Document + # Parses +str+ into a Document. + # + # Use RDoc::Markup#parse instead of this method. def self.parse str parser = new @@ -74,12 +75,15 @@ class RDoc::Markup::Parser # Creates a new Parser. See also ::parse def initialize - @tokens = [] - @current_token = nil - @debug = false - - @line = 0 - @line_pos = 0 + @binary_input = nil + @current_token = nil + @debug = false + @have_encoding = Object.const_defined? :Encoding + @input_encoding = nil + @line = 0 + @line_pos = 0 + @s = nil + @tokens = [] end ## @@ -107,13 +111,13 @@ class RDoc::Markup::Parser p :list_start => margin if @debug list = RDoc::Markup::List.new + label = nil until @tokens.empty? do type, data, column, = get case type - when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then - + when *LIST_TOKENS then if column < margin || (list.type && list.type != type) then unget break @@ -124,6 +128,8 @@ class RDoc::Markup::Parser case type when :NOTE, :LABEL then + label = [] unless label + if peek_type == :NEWLINE then # description not on the same line as LABEL/NOTE # skip the trailing newline & any blank lines below @@ -146,32 +152,35 @@ class RDoc::Markup::Parser # In all cases, we have an empty description. # In the last case only, we continue. if peek_type.nil? || column < margin then - empty = 1 + empty = true elsif column == margin then case peek_type when type - empty = 2 # continue + empty = :continue when *LIST_TOKENS - empty = 1 + empty = true else - empty = 0 + empty = false end else - empty = 0 + empty = false end - if empty > 0 then - item = RDoc::Markup::ListItem.new(data) - item << RDoc::Markup::BlankLine.new - list << item - break if empty == 1 - next + if empty then + label << data + next if empty == :continue + break end end else data = nil end + if label then + data = label << data + label = nil + end + list_item = RDoc::Markup::ListItem.new data parse list_item, column list << list_item @@ -184,7 +193,13 @@ class RDoc::Markup::Parser p :list_end => margin if @debug - return nil if list.empty? + if list.empty? then + return nil unless label + return nil unless [:LABEL, :NOTE].include? list.type + + list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new + list << list_item + end list end @@ -200,15 +215,20 @@ class RDoc::Markup::Parser until @tokens.empty? do type, data, column, = get - if type == :TEXT && column == margin then + if type == :TEXT and column == margin then paragraph << data - skip :NEWLINE + + break if peek_token.first == :BREAK + + data << ' ' if skip :NEWLINE else unget break end end + paragraph.parts.last.sub!(/ \z/, '') # cleanup + p :paragraph_end => margin if @debug paragraph @@ -267,7 +287,7 @@ class RDoc::Markup::Parser peek_column ||= column + width indent = peek_column - column - width line << ' ' * indent - when :TEXT then + when :BREAK, :TEXT then line << data else # *LIST_TOKENS list_marker = case type @@ -298,6 +318,19 @@ class RDoc::Markup::Parser end ## + # The character offset for the input string at the given +byte_offset+ + + def char_pos byte_offset + if @have_encoding then + matched = @binary_input[0, byte_offset] + matched.force_encoding @input_encoding + matched.length + else + byte_offset + end + end + + ## # Pulls the next token from the stream. def get @@ -321,7 +354,12 @@ class RDoc::Markup::Parser until @tokens.empty? do type, data, column, = get - if type == :NEWLINE then + case type + when :BREAK then + parent << RDoc::Markup::BlankLine.new + skip :NEWLINE, false + next + when :NEWLINE then # trailing newlines are skipped below, so this is a blank line parent << RDoc::Markup::BlankLine.new skip :NEWLINE, false @@ -373,6 +411,21 @@ class RDoc::Markup::Parser end ## + # Creates the StringScanner + + def setup_scanner input + @line = 0 + @line_pos = 0 + + if @have_encoding then + @input_encoding = input.encoding + @binary_input = input.dup.force_encoding Encoding::BINARY + end + + @s = StringScanner.new input + end + + ## # Skips the next token if its type is +token_type+. # # Optionally raises an error if the next token is not of the expected type. @@ -389,58 +442,55 @@ class RDoc::Markup::Parser # Turns text +input+ into a stream of tokens def tokenize input - s = StringScanner.new input + setup_scanner input - @line = 0 - @line_pos = 0 - - until s.eos? do - pos = s.pos + until @s.eos? do + pos = @s.pos # leading spaces will be reflected by the column of the next token # the only thing we loose are trailing spaces at the end of the file - next if s.scan(/ +/) + next if @s.scan(/ +/) # note: after BULLET, LABEL, etc., # indent will be the column of the next non-newline token @tokens << case # [CR]LF => :NEWLINE - when s.scan(/\r?\n/) then - token = [:NEWLINE, s.matched, *token_pos(pos)] - @line_pos = s.pos + when @s.scan(/\r?\n/) then + token = [:NEWLINE, @s.matched, *token_pos(pos)] + @line_pos = char_pos @s.pos @line += 1 token # === text => :HEADER then :TEXT - when s.scan(/(=+)(\s*)/) then - level = s[1].length + when @s.scan(/(=+)(\s*)/) then + level = @s[1].length header = [:HEADER, level, *token_pos(pos)] - if s[2] =~ /^\r?\n/ then - s.pos -= s[2].length + if @s[2] =~ /^\r?\n/ then + @s.pos -= @s[2].length header else - pos = s.pos - s.scan(/.*/) + pos = @s.pos + @s.scan(/.*/) @tokens << header - [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] + [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)] end # --- (at least 3) and nothing else on the line => :RULE - when s.scan(/(-{3,}) *$/) then - [:RULE, s[1].length - 2, *token_pos(pos)] + when @s.scan(/(-{3,}) *\r?$/) then + [:RULE, @s[1].length - 2, *token_pos(pos)] # * or - followed by white space and text => :BULLET - when s.scan(/([*-]) +(\S)/) then - s.pos -= s[2].bytesize # unget \S - [:BULLET, s[1], *token_pos(pos)] + when @s.scan(/([*-]) +(\S)/) then + @s.pos -= @s[2].bytesize # unget \S + [:BULLET, @s[1], *token_pos(pos)] # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER - when s.scan(/([a-z]|\d+)\. +(\S)/i) then + when @s.scan(/([a-z]|\d+)\. +(\S)/i) then # FIXME if tab(s), the column will be wrong # either support tabs everywhere by first expanding them to # spaces, or assume that they will have been replaced # before (and provide a check for that at least in debug # mode) - list_label = s[1] - s.pos -= s[2].bytesize # unget \S + list_label = @s[1] + @s.pos -= @s[2].bytesize # unget \S list_type = case list_label when /[a-z]/ then :LALPHA @@ -451,14 +501,21 @@ class RDoc::Markup::Parser end [list_type, list_label, *token_pos(pos)] # [text] followed by spaces or end of line => :LABEL - when s.scan(/\[(.*?)\]( +|$)/) then - [:LABEL, s[1], *token_pos(pos)] + when @s.scan(/\[(.*?)\]( +|\r?$)/) then + [:LABEL, @s[1], *token_pos(pos)] # text:: followed by spaces or end of line => :NOTE - when s.scan(/(.*?)::( +|$)/) then - [:NOTE, s[1], *token_pos(pos)] + when @s.scan(/(.*?)::( +|\r?$)/) then + [:NOTE, @s[1], *token_pos(pos)] # anything else: :TEXT - else s.scan(/.*/) - [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)] + else @s.scan(/(.*?)( )?\r?$/) + token = [:TEXT, @s[1], *token_pos(pos)] + + if @s[2] then + @tokens << token + [:BREAK, @s[2], *token_pos(pos + @s[1].length)] + else + token + end end end @@ -466,9 +523,12 @@ class RDoc::Markup::Parser end ## - # Calculates the column and line of the current token based on +offset+. + # Calculates the column (by character) and line of the current token from + # +scanner+ based on +byte_offset+. + + def token_pos byte_offset + offset = char_pos byte_offset - def token_pos offset [offset - @line_pos, @line] end @@ -484,14 +544,3 @@ class RDoc::Markup::Parser end -require 'rdoc/markup/blank_line' -require 'rdoc/markup/document' -require 'rdoc/markup/heading' -require 'rdoc/markup/list' -require 'rdoc/markup/list_item' -require 'rdoc/markup/raw' -require 'rdoc/markup/paragraph' -require 'rdoc/markup/indented_paragraph' -require 'rdoc/markup/rule' -require 'rdoc/markup/verbatim' - |