diff options
Diffstat (limited to 'pcre/pcre_scanner.cc')
-rw-r--r-- | pcre/pcre_scanner.cc | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/pcre/pcre_scanner.cc b/pcre/pcre_scanner.cc new file mode 100644 index 00000000000..6be2be6829b --- /dev/null +++ b/pcre/pcre_scanner.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Sanjay Ghemawat + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <vector> +#include <assert.h> + +#include "pcrecpp_internal.h" +#include "pcre_scanner.h" + +using std::vector; + +namespace pcrecpp { + +Scanner::Scanner() + : data_(), + input_(data_), + skip_(NULL), + should_skip_(false), + skip_repeat_(false), + save_comments_(false), + comments_(NULL), + comments_offset_(0) { +} + +Scanner::Scanner(const string& in) + : data_(in), + input_(data_), + skip_(NULL), + should_skip_(false), + skip_repeat_(false), + save_comments_(false), + comments_(NULL), + comments_offset_(0) { +} + +Scanner::~Scanner() { + delete skip_; + delete comments_; +} + +void Scanner::SetSkipExpression(const char* re) { + delete skip_; + if (re != NULL) { + skip_ = new RE(re); + should_skip_ = true; + skip_repeat_ = true; + ConsumeSkip(); + } else { + skip_ = NULL; + should_skip_ = false; + skip_repeat_ = false; + } +} + +void Scanner::Skip(const char* re) { + delete skip_; + if (re != NULL) { + skip_ = new RE(re); + should_skip_ = true; + skip_repeat_ = false; + ConsumeSkip(); + } else { + skip_ = NULL; + should_skip_ = false; + skip_repeat_ = false; + } +} + +void Scanner::DisableSkip() { + assert(skip_ != NULL); + should_skip_ = false; +} + +void Scanner::EnableSkip() { + assert(skip_ != NULL); + should_skip_ = true; + ConsumeSkip(); +} + +int Scanner::LineNumber() const { + // TODO: Make it more efficient by keeping track of the last point + // where we computed line numbers and counting newlines since then. + // We could use std:count, but not all systems have it. :-( + int count = 1; + for (const char* p = data_.data(); p < input_.data(); ++p) + if (*p == '\n') + ++count; + return count; +} + +int Scanner::Offset() const { + return (int)(input_.data() - data_.c_str()); +} + +bool Scanner::LookingAt(const RE& re) const { + int consumed; + return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0); +} + + +bool Scanner::Consume(const RE& re, + const Arg& arg0, + const Arg& arg1, + const Arg& arg2) { + const bool result = re.Consume(&input_, arg0, arg1, arg2); + if (result && should_skip_) ConsumeSkip(); + return result; +} + +// helper function to consume *skip_ and honour save_comments_ +void Scanner::ConsumeSkip() { + const char* start_data = input_.data(); + while (skip_->Consume(&input_)) { + if (!skip_repeat_) { + // Only one skip allowed. + break; + } + } + if (save_comments_) { + if (comments_ == NULL) { + comments_ = new vector<StringPiece>; + } + // already pointing one past end, so no need to +1 + int length = (int)(input_.data() - start_data); + if (length > 0) { + comments_->push_back(StringPiece(start_data, length)); + } + } +} + + +void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) { + // short circuit out if we've not yet initialized comments_ + // (e.g., when save_comments is false) + if (!comments_) { + return; + } + // TODO: if we guarantee that comments_ will contain StringPieces + // that are ordered by their start, then we can do a binary search + // for the first StringPiece at or past start and then scan for the + // ones contained in the range, quit early (use equal_range or + // lower_bound) + for (vector<StringPiece>::const_iterator it = comments_->begin(); + it != comments_->end(); ++it) { + if ((it->data() >= data_.c_str() + start && + it->data() + it->size() <= data_.c_str() + end)) { + ranges->push_back(*it); + } + } +} + + +void Scanner::GetNextComments(vector<StringPiece> *ranges) { + // short circuit out if we've not yet initialized comments_ + // (e.g., when save_comments is false) + if (!comments_) { + return; + } + for (vector<StringPiece>::const_iterator it = + comments_->begin() + comments_offset_; + it != comments_->end(); ++it) { + ranges->push_back(*it); + ++comments_offset_; + } +} + +} // namespace pcrecpp |