summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2022-10-05 20:07:02 +0900
committergit <svn-admin@ruby-lang.org>2022-11-27 17:24:38 +0000
commit21977b95e238da08ee2dc50fe5a908f11feb130c (patch)
treeeb6a2ef089233213d412296475cc042286ae7c72
parent29862ce27303107935de6df4f85dd7fd610f19e2 (diff)
downloadruby-21977b95e238da08ee2dc50fe5a908f11feb130c.tar.gz
[ruby/rdoc] Refactor `RDoc::Markup::Parser#tokenize`
Make verbatims text or newline only, and simplify `build_verbatim`. https://github.com/ruby/rdoc/commit/41ceae93b3
-rw-r--r--lib/rdoc/markup/parser.rb214
-rw-r--r--test/rdoc/test_rdoc_markup_parser.rb21
2 files changed, 118 insertions, 117 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb
index 0029df7e65..a450774cf5 100644
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@@ -272,44 +272,11 @@ class RDoc::Markup::Parser
end
case type
- when :HEADER then
- line << '=' * data
- _, _, peek_column, = peek_token
- peek_column ||= column + data
- indent = peek_column - column - data
- line << ' ' * indent
- when :RULE then
- width = 2 + data
- line << '-' * width
- _, _, peek_column, = peek_token
- peek_column ||= column + width
- indent = peek_column - column - width
- line << ' ' * indent
when :BREAK, :TEXT then
line << data
- when :BLOCKQUOTE then
- line << '>>>'
- peek_type, _, peek_column = peek_token
- if peek_type != :NEWLINE and peek_column
- line << ' ' * (peek_column - column - 3)
- end
- else # *LIST_TOKENS
- list_marker = case type
- when :BULLET then data
- when :LABEL then "[#{data}]"
- when :NOTE then "#{data}::"
- else # :LALPHA, :NUMBER, :UALPHA
- "#{data}."
- end
- line << list_marker
- peek_type, _, peek_column = peek_token
- unless peek_type == :NEWLINE then
- peek_column ||= column + list_marker.length
- indent = peek_column - column - list_marker.length
- line << ' ' * indent
- end
+ else
+ raise TypeError, "unexpected token under verbatim: #{type}"
end
-
end
verbatim << line << "\n" unless line.empty?
@@ -481,11 +448,37 @@ class RDoc::Markup::Parser
##
# Turns text +input+ into a stream of tokens
- def tokenize input
+ def tokenize(input)
setup_scanner input
+ margin = @s.pos[0]
+ tokenize_indented(margin)
+ tokenize_input(margin)
+ end
+
+ def newline!(pos = nil)
+ if pos or (@s.scan(/ *(?=\r?\n)/) and pos = @s.pos and @s.scan(/\r?\n/))
+ @tokens << [:NEWLINE, @s.matched, *pos]
+ @s.newline!
+ end
+ end
- until @s.eos? do
+ def tokenize_indented(column)
+ indent = / {#{column+1},}(?=\S)| *(?=\r?\n)/
+ while @s.scan(indent)
pos = @s.pos
+ if @s.scan(/(.+)(?=\r?\n)?/)
+ @tokens << [:TEXT, @s.matched, *pos]
+ end
+ newline! or break
+ end
+ end
+
+ def tokenize_input(margin)
+ column = 0
+
+ until @s.eos?
+ pos = @s.pos
+ break if pos[0] < (margin ||= pos[0])
# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
@@ -494,75 +487,84 @@ class RDoc::Markup::Parser
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token
- @tokens << case
- # [CR]LF => :NEWLINE
- when @s.scan(/\r?\n/) then
- token = [:NEWLINE, @s.matched, *pos]
- @s.newline!
- token
- # === text => :HEADER then :TEXT
- when @s.scan(/(=+)(\s*)/) then
- level = @s[1].length
- header = [:HEADER, level, *pos]
-
- if @s[2] =~ /^\r?\n/ then
- @s.unscan(@s[2])
- header
- else
- pos = @s.pos
- @s.scan(/.*/)
- @tokens << header
- [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
- end
- # --- (at least 3) and nothing else on the line => :RULE
- when @s.scan(/(-{3,}) *\r?$/) then
- [:RULE, @s[1].length - 2, *pos]
- # * or - followed by white space and text => :BULLET
- when @s.scan(/([*-]) +(\S)/) then
- @s.unscan(@s[2])
- [:BULLET, @s[1], *pos]
- # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
- when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
- # FIXME if tab(s), the column will be wrong
- # either support tabs everywhere by first expanding them to
- # spaces, or assume that they will have been replaced
- # before (and provide a check for that at least in debug
- # mode)
- list_label = @s[1]
- @s.unscan(@s[2])
- list_type =
- case list_label
- when /[a-z]/ then :LALPHA
- when /[A-Z]/ then :UALPHA
- when /\d/ then :NUMBER
- else
- raise ParseError, "BUG token #{list_label}"
- end
- [list_type, list_label, *pos]
- # [text] followed by spaces or end of line => :LABEL
- when @s.scan(/\[(.*?)\]( +|\r?$)/) then
- [:LABEL, @s[1], *pos]
- # text:: followed by spaces or end of line => :NOTE
- when @s.scan(/(.*?)::( +|\r?$)/) then
- [:NOTE, @s[1], *pos]
- # >>> followed by end of line => :BLOCKQUOTE
- when @s.scan(/>>> *(\w+)?$/) then
- if word = @s[1]
- @s.unscan(word)
- end
- [:BLOCKQUOTE, word, *pos]
- # anything else: :TEXT
- else
- @s.scan(/(.*?)( )?\r?$/)
- token = [:TEXT, @s[1], *pos]
-
- if @s[2] then
- @tokens << token
- [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
- else
- token
- end
- end
+ case
+ # [CR]LF => :NEWLINE
+ when @s.scan(/\r?\n/)
+ newline!(pos)
+ next
+
+ # === text => :HEADER then :TEXT
+ when @s.scan(/(=+)(\s*)/)
+ level = @s[1].length
+ header = [:HEADER, level, *pos]
+
+ if @s[2] =~ /^\r?\n/
+ @s.unscan(@s[2])
+ @tokens << header
+ else
+ pos = @s.pos
+ @s.scan(/.*/)
+ @tokens << header
+ @tokens << [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
+ end
+
+ # --- (at least 3) and nothing else on the line => :RULE
+ when @s.scan(/(-{3,}) *\r?$/)
+ @tokens << [:RULE, @s[1].length - 2, *pos]
+
+ # * or - followed by white space and text => :BULLET
+ when @s.scan(/([*-]) +(?=\S)/)
+ @tokens << [:BULLET, @s[1], *pos]
+ tokenize_input(nil)
+
+ # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
+ when @s.scan(/([a-z]|\d+)\. +(?=\S)/i)
+ # FIXME if tab(s), the column will be wrong
+ # either support tabs everywhere by first expanding them to
+ # spaces, or assume that they will have been replaced
+ # before (and provide a check for that at least in debug
+ # mode)
+ list_label = @s[1]
+ list_type =
+ case list_label
+ when /[a-z]/ then :LALPHA
+ when /[A-Z]/ then :UALPHA
+ when /\d/ then :NUMBER
+ else
+ raise ParseError, "BUG token #{list_label}"
+ end
+ @tokens << [list_type, list_label, *pos]
+ tokenize_input(nil)
+
+ # [text] followed by spaces or end of line => :LABEL
+ when @s.scan(/\[(.*?)\]( +|\r?$)/)
+ @tokens << [:LABEL, @s[1], *pos]
+ tokenize_input(nil)
+
+ # text:: followed by spaces or end of line => :NOTE
+ when @s.scan(/(.*?)::( +|\r?$)/)
+ @tokens << [:NOTE, @s[1], *pos]
+ tokenize_input(nil)
+
+ # >>> followed by end of line => :BLOCKQUOTE
+ when @s.scan(/>>> *(\w+)?\r?$/)
+ @tokens << [:BLOCKQUOTE, @s[1], *pos]
+ newline!
+ tokenize_input(nil)
+
+ # anything else: :TEXT
+ else
+ column = pos[0]
+ @s.scan(/(.*?)( )?\r?$/)
+ @tokens << [:TEXT, @s[1], *pos]
+
+ if @s[2]
+ @tokens << [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
+ end
+ if newline!
+ tokenize_indented(column)
+ end
+ end
end
self
diff --git a/test/rdoc/test_rdoc_markup_parser.rb b/test/rdoc/test_rdoc_markup_parser.rb
index 6fccf09612..26c4cdc7db 100644
--- a/test/rdoc/test_rdoc_markup_parser.rb
+++ b/test/rdoc/test_rdoc_markup_parser.rb
@@ -1591,8 +1591,7 @@ Example heading:
[:TEXT, 'Example heading:', 0, 0],
[:NEWLINE, "\n", 16, 0],
[:NEWLINE, "\n", 0, 1],
- [:HEADER, 3, 3, 2],
- [:TEXT, 'heading three', 7, 2],
+ [:TEXT, '=== heading three', 3, 2],
[:NEWLINE, "\n", 20, 2],
]
@@ -1608,7 +1607,7 @@ Example heading:
expected = [
[:TEXT, 'Verbatim section here that is double-underlined', 2, 0],
[:NEWLINE, "\n", 49, 0],
- [:HEADER, 47, 2, 1],
+ [:TEXT, '='*47, 2, 1],
[:NEWLINE, "\n", 49, 1],
]
@@ -1624,14 +1623,14 @@ Example heading:
STR
expected = [
- [:TEXT, 'A', 2, 0],
- [:NEWLINE, "\n", 3, 0],
- [:TEXT, 'b', 4, 1],
- [:NEWLINE, "\n", 5, 1],
- [:HEADER, 47, 2, 2],
- [:NEWLINE, "\n", 49, 2],
- [:TEXT, 'c', 4, 3],
- [:NEWLINE, "\n", 5, 3],
+ [:TEXT, 'A', 2, 0],
+ [:NEWLINE, "\n", 3, 0],
+ [:TEXT, 'b', 4, 1],
+ [:NEWLINE, "\n", 5, 1],
+ [:TEXT, '='*47, 2, 2],
+ [:NEWLINE, "\n", 49, 2],
+ [:TEXT, 'c', 4, 3],
+ [:NEWLINE, "\n", 5, 3],
]
assert_equal expected, @RMP.tokenize(str)