diff options
Diffstat (limited to 'src/third_party/boost-1.69.0/boost/algorithm/searching/knuth_morris_pratt.hpp')
-rw-r--r-- | src/third_party/boost-1.69.0/boost/algorithm/searching/knuth_morris_pratt.hpp | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/src/third_party/boost-1.69.0/boost/algorithm/searching/knuth_morris_pratt.hpp b/src/third_party/boost-1.69.0/boost/algorithm/searching/knuth_morris_pratt.hpp new file mode 100644 index 00000000000..5b5b64a729a --- /dev/null +++ b/src/third_party/boost-1.69.0/boost/algorithm/searching/knuth_morris_pratt.hpp @@ -0,0 +1,263 @@ +/* + Copyright (c) Marshall Clow 2010-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org +*/ + +#ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP +#define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP + +#include <vector> +#include <iterator> // for std::iterator_traits + +#include <boost/assert.hpp> +#include <boost/static_assert.hpp> + +#include <boost/range/begin.hpp> +#include <boost/range/end.hpp> + +#include <boost/utility/enable_if.hpp> +#include <boost/type_traits/is_same.hpp> + +#include <boost/algorithm/searching/detail/debugging.hpp> + +// #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG + +namespace boost { namespace algorithm { + +// #define NEW_KMP + +/* + A templated version of the Knuth-Morris-Pratt searching algorithm. + + Requirements: + * Random-access iterators + * The two iterator types (I1 and I2) must "point to" the same underlying type. + + http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm +*/ + + template <typename patIter> + class knuth_morris_pratt { + typedef typename std::iterator_traits<patIter>::difference_type difference_type; + public: + knuth_morris_pratt ( patIter first, patIter last ) + : pat_first ( first ), pat_last ( last ), + k_pattern_length ( std::distance ( pat_first, pat_last )), + skip_ ( k_pattern_length + 1 ) { +#ifdef NEW_KMP + preKmp ( pat_first, pat_last ); +#else + init_skip_table ( pat_first, pat_last ); +#endif +#ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG + detail::PrintTable ( skip_.begin (), skip_.end ()); +#endif + } + + ~knuth_morris_pratt () {} + + /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p ) + /// \brief Searches the corpus for the pattern that was passed into the constructor + /// + /// \param corpus_first The start of the data to search (Random Access Iterator) + /// \param corpus_last One past the end of the data to search + /// \param p A predicate used for the search comparisons. + /// + template <typename corpusIter> + std::pair<corpusIter, corpusIter> + operator () ( corpusIter corpus_first, corpusIter corpus_last ) const { + BOOST_STATIC_ASSERT (( boost::is_same< + typename std::iterator_traits<patIter>::value_type, + typename std::iterator_traits<corpusIter>::value_type>::value )); + + if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it! + if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start + + const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last ); + // If the pattern is larger than the corpus, we can't find it! + if ( k_corpus_length < k_pattern_length ) + return std::make_pair(corpus_last, corpus_last); + + return do_search ( corpus_first, corpus_last, k_corpus_length ); + } + + template <typename Range> + std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type> + operator () ( Range &r ) const { + return (*this) (boost::begin(r), boost::end(r)); + } + + private: +/// \cond DOXYGEN_HIDE + patIter pat_first, pat_last; + const difference_type k_pattern_length; + std::vector <difference_type> skip_; + + /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p ) + /// \brief Searches the corpus for the pattern that was passed into the constructor + /// + /// \param corpus_first The start of the data to search (Random Access Iterator) + /// \param corpus_last One past the end of the data to search + /// \param p A predicate used for the search comparisons. + /// + template <typename corpusIter> + std::pair<corpusIter, corpusIter> + do_search ( corpusIter corpus_first, corpusIter corpus_last, + difference_type k_corpus_length ) const { + difference_type match_start = 0; // position in the corpus that we're matching + +#ifdef NEW_KMP + int patternIdx = 0; + while ( match_start < k_corpus_length ) { + while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] ) + patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch + + patternIdx++; + match_start++; //<--- corpus is always increased by 1 + + if ( patternIdx >= (int) k_pattern_length ) + return corpus_first + match_start - patternIdx; + } + +#else +// At this point, we know: +// k_pattern_length <= k_corpus_length +// for all elements of skip, it holds -1 .. k_pattern_length +// +// In the loop, we have the following invariants +// idx is in the range 0 .. k_pattern_length +// match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1 + + const difference_type last_match = k_corpus_length - k_pattern_length; + difference_type idx = 0; // position in the pattern we're comparing + + while ( match_start <= last_match ) { + while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) { + if ( ++idx == k_pattern_length ) + return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length); + } + // Figure out where to start searching again + // assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward + match_start += idx - skip_ [ idx ]; + idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0; + // assert ( idx >= 0 && idx < k_pattern_length ); + } +#endif + + // We didn't find anything + return std::make_pair(corpus_last, corpus_last); + } + + + void preKmp ( patIter first, patIter last ) { + const difference_type count = std::distance ( first, last ); + + difference_type i, j; + + i = 0; + j = skip_[0] = -1; + while (i < count) { + while (j > -1 && first[i] != first[j]) + j = skip_[j]; + i++; + j++; + if (first[i] == first[j]) + skip_[i] = skip_[j]; + else + skip_[i] = j; + } + } + + + void init_skip_table ( patIter first, patIter last ) { + const difference_type count = std::distance ( first, last ); + + difference_type j; + skip_ [ 0 ] = -1; + for ( int i = 1; i <= count; ++i ) { + j = skip_ [ i - 1 ]; + while ( j >= 0 ) { + if ( first [ j ] == first [ i - 1 ] ) + break; + j = skip_ [ j ]; + } + skip_ [ i ] = j + 1; + } + } +// \endcond + }; + + +/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters + Use a bit of TMP to disambiguate the 3-argument templates */ + +/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last, +/// patIter pat_first, patIter pat_last ) +/// \brief Searches the corpus for the pattern. +/// +/// \param corpus_first The start of the data to search (Random Access Iterator) +/// \param corpus_last One past the end of the data to search +/// \param pat_first The start of the pattern to search for (Random Access Iterator) +/// \param pat_last One past the end of the data to search for +/// + template <typename patIter, typename corpusIter> + std::pair<corpusIter, corpusIter> knuth_morris_pratt_search ( + corpusIter corpus_first, corpusIter corpus_last, + patIter pat_first, patIter pat_last ) + { + knuth_morris_pratt<patIter> kmp ( pat_first, pat_last ); + return kmp ( corpus_first, corpus_last ); + } + + template <typename PatternRange, typename corpusIter> + std::pair<corpusIter, corpusIter> knuth_morris_pratt_search ( + corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern ) + { + typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator; + knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern)); + return kmp ( corpus_first, corpus_last ); + } + + template <typename patIter, typename CorpusRange> + typename boost::disable_if_c< + boost::is_same<CorpusRange, patIter>::value, + std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> > + ::type + knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last ) + { + knuth_morris_pratt<patIter> kmp ( pat_first, pat_last ); + return kmp (boost::begin (corpus), boost::end (corpus)); + } + + template <typename PatternRange, typename CorpusRange> + std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> + knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern ) + { + typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator; + knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern)); + return kmp (boost::begin (corpus), boost::end (corpus)); + } + + + // Creator functions -- take a pattern range, return an object + template <typename Range> + boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type> + make_knuth_morris_pratt ( const Range &r ) { + return boost::algorithm::knuth_morris_pratt + <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r)); + } + + template <typename Range> + boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type> + make_knuth_morris_pratt ( Range &r ) { + return boost::algorithm::knuth_morris_pratt + <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r)); + } +}} + +#endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP |