diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-06-10 01:31:01 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-06-10 01:31:01 +0000 |
commit | ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d (patch) | |
tree | d3e1f95a5acf262a9dd46e9663b7034bb285b406 /lib/rexml/source.rb | |
parent | ca02190d8887ecd852e4e3f18f3a3ea91e9c6f7a (diff) | |
download | ruby-ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d.tar.gz |
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/source.rb')
-rw-r--r-- | lib/rexml/source.rb | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb new file mode 100644 index 0000000000..8c175785b7 --- /dev/null +++ b/lib/rexml/source.rb @@ -0,0 +1,191 @@ +require 'rexml/encoding' + +module REXML + # Generates Source-s. USE THIS CLASS. + class SourceFactory + # Generates a Source object + # @param arg Either a String, or an IO + # @return a Source, or nil if a bad argument was given + def SourceFactory::create_from arg#, slurp=true + if arg.kind_of? String + source = Source.new(arg) + elsif arg.kind_of? IO + source = IOSource.new(arg) + end + source + end + end + + # A Source can be searched for patterns, and wraps buffers and other + # objects and provides consumption of text + class Source + include Encoding + # The current buffer (what we're going to read next) + attr_reader :buffer + # The line number of the last consumed text + attr_reader :line + attr_reader :encoding + + # Constructor + # @param arg must be a String, and should be a valid XML document + def initialize arg + @orig = @buffer = arg + self.encoding = check_encoding( @buffer ) + #@buffer = decode(@buffer) unless @encoding == UTF_8 + @line = 0 + end + + # Inherited from Encoding + # Overridden to support optimized en/decoding + def encoding=(enc) + super + eval <<-EOL + alias :encode :to_#{encoding.tr('-', '_').downcase} + alias :decode :from_#{encoding.tr('-', '_').downcase} + EOL + @line_break = encode( '>' ) + if enc != UTF_8 + @buffer = decode(@buffer) + @to_utf = true + else + @to_utf = false + end + end + + # Scans the source for a given pattern. Note, that this is not your + # usual scan() method. For one thing, the pattern argument has some + # requirements; for another, the source can be consumed. You can easily + # confuse this method. Originally, the patterns were easier + # to construct and this method more robust, because this method + # generated search regexes on the fly; however, this was + # computationally expensive and slowed down the entire REXML package + # considerably, since this is by far the most commonly called method. + # @param pattern must be a Regexp, and must be in the form of + # /^\s*(#{your pattern, with no groups})(.*)/. The first group + # will be returned; the second group is used if the consume flag is + # set. + # @param consume if true, the pattern returned will be consumed, leaving + # everything after it in the Source. + # @return the pattern, if found, or nil if the Source is empty or the + # pattern is not found. + def scan pattern, consume=false + return nil if @buffer.nil? + rv = @buffer.scan(pattern) + @buffer = $' if consume and rv.size>0 + rv + end + + def read + end + + def match pattern, consume=false + md = pattern.match @buffer + @buffer = $' if consume and md + return md + end + + # @return true if the Source is exhausted + def empty? + @buffer.nil? or @buffer.strip.nil? + end + + # @return the current line in the source + def current_line + lines = @orig.split + res = lines.grep @buffer[0..30] + res = res[-1] if res.kind_of? Array + lines.index( res ) if res + end + end + + # A Source that wraps an IO. See the Source class for method + # documentation + class IOSource < Source + #attr_reader :block_size + + def initialize arg, block_size=500 + @er_source = @source = arg + @to_utf = false + # READLINE OPT + # The following was commented out when IOSource started using readline + # to pull the data from the stream. + #@block_size = block_size + #super @source.read(@block_size) + @line_break = '>' + super @source.readline( @line_break ) + end + + def scan pattern, consume=false + rv = super + # You'll notice that this next section is very similar to the same + # section in match(), but just a liiittle different. This is + # because it is a touch faster to do it this way with scan() + # than the way match() does it; enough faster to warrent duplicating + # some code + if rv.size == 0 + until @buffer =~ pattern or @source.nil? + begin + # READLINE OPT + #str = @source.read(@block_size) + str = @source.readline(@line_break) + str = decode(str) if @to_utf and str + @buffer << str + rescue + @source = nil + end + end + rv = super + end + rv.taint + rv + end + + def read + begin + str = @source.readline('>') + str = decode(str) if @to_utf and str + @buffer << str + rescue + @source = nil + end + end + + def match pattern, consume=false + rv = pattern.match(@buffer) + @buffer = $' if consume and rv + while !rv and @source + begin + str = @source.readline('>') + str = decode(str) if @to_utf and str + @buffer << str + rv = pattern.match(@buffer) + @buffer = $' if consume and rv + rescue + @source = nil + end + end + rv.taint + rv + end + + def empty? + super and ( @source.nil? || @source.eof? ) + end + + # @return the current line in the source + def current_line + pos = @er_source.pos # The byte position in the source + lineno = @er_source.lineno # The XML < position in the source + @er_source.rewind + line = 0 # The \r\n position in the source + begin + while @er_source.pos < pos + @er_source.readline + line += 1 + end + rescue + end + [pos, lineno, line] + end + end +end |