diff options
author | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-07-24 14:39:54 +0000 |
---|---|---|
committer | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-07-24 14:39:54 +0000 |
commit | 6d74a96b7de59131dac76fc8a0b05aa45114ed9f (patch) | |
tree | a4d363f2143c17d31818485bc16cce2febbdee66 | |
parent | 47584ef2c0f361da0a8d491f0c43d93f71c3932c (diff) | |
download | gcc-6d74a96b7de59131dac76fc8a0b05aa45114ed9f.tar.gz |
2013-07-24 Tim Shen <timshen91@gmail.com>
Reimplment matcher using Depth-first search(backtracking).
PR libstdc++/53622
PR libstdc++/57173
* include/bits/regex.h: regex_match() and regex_search().
* include/bits/regex_cursor.h: Fix _M_set_pos().
* include/bits/regex_grep_matcher.h: add _M_dfs_match().
* include/bits/regex_grep_matcher.tcc: Implement it.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
New.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
New.
* testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
Remove xfail.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@201213 138bc75d-0d04-0410-961f-82ee72b054a4
8 files changed, 193 insertions, 9 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index ab21abefed5..45dc9452afb 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,19 @@ +2013-07-24 Tim Shen <timshen91@gmail.com> + + Reimplment matcher using Depth-first search(backtracking). + PR libstdc++/53622 + PR libstdc++/57173 + * include/bits/regex.h: regex_match() and regex_search(). + * include/bits/regex_cursor.h: Fix _M_set_pos(). + * include/bits/regex_grep_matcher.h: add _M_dfs_match(). + * include/bits/regex_grep_matcher.tcc: Implement it. + * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc: + New. + * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc: + New. + * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc: + Remove xfail. + 2013-07-23 Tim Shen <timshen91@gmail.com> Implement regex_iterator and regex_token_iterator. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 9848f717fc6..64a1807033b 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -2185,8 +2185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags); - __matcher._M_match(); - return __m[0].matched; + return __matcher._M_dfs_match(); } /** @@ -2338,8 +2337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags); - __matcher._M_search_from_first(); - if (__m[0].matched) + if (__matcher._M_dfs_search_from_first()) { __r._M_set_range(__m.size(), __detail::_SpecializedCursor<_Bi_iter> diff --git a/libstdc++-v3/include/bits/regex_cursor.h b/libstdc++-v3/include/bits/regex_cursor.h index 44e9d1d9239..444d07ae263 100644 --- a/libstdc++-v3/include/bits/regex_cursor.h +++ b/libstdc++-v3/include/bits/regex_cursor.h @@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { virtual ~_PatternCursor() { }; virtual void _M_next() = 0; + virtual void _M_prev() = 0; virtual bool _M_at_end() const = 0; }; @@ -66,6 +67,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_next() { ++_M_c; } + void + _M_prev() + { --_M_c; } + _FwdIterT _M_pos() const { return _M_c; } diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h index da9264d5c17..c402c780f1c 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.h +++ b/libstdc++-v3/include/bits/regex_grep_matcher.h @@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__j == 0) _M_results.at(__i).first = __c._M_pos(); else - _M_results.at(__i).second = __c._M_pos()+1; + _M_results.at(__i).second = __c._M_pos(); } /// A stack of states used in evaluating the NFA. @@ -127,9 +127,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_pattern(__p), _M_results(__r) { } - void _M_match(); + void + _M_match(); + + void + _M_search_from_first(); - void _M_search_from_first(); + bool + _M_dfs_match() + { return _M_dfs<true>(_M_nfa->_M_start()); } + + bool + _M_dfs_search_from_first() + { return _M_dfs<false>(_M_nfa->_M_start()); } private: _StateSet @@ -141,6 +151,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSet _M_e_closure(_StateStack& __stack, const _StateSet& __s); + template<bool __match_mode> + bool + _M_dfs(_StateIdT __i); + const std::shared_ptr<_Nfa> _M_nfa; _PatternCursor& _M_pattern; _Results& _M_results; diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc index 2d4a9a60faa..46b98174888 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc +++ b/libstdc++-v3/include/bits/regex_grep_matcher.tcc @@ -103,6 +103,56 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION + // _M_dfs() take a state, along with current string cursor(_M_pattern), + // trying to match current state with current character. + // Only _S_opcode_match will consume a character. + // TODO: This is too slow. Try to compile the NFA to a DFA. + template<bool __match_mode> + bool _Grep_matcher:: + _M_dfs(_StateIdT __i) + { + if (__i == _S_invalid_state_id) + // This is not that certain. Need deeper investigate. + return false; + const auto& __state = (*_M_nfa)[__i]; + bool __ret = false; + switch (__state._M_opcode) + { + case _S_opcode_alternative: + // Greedy mode by default. For non-greedy mode, + // swap _M_alt and _M_next. + __ret = _M_dfs<__match_mode>(__state._M_alt) + || _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_begin: + __state._M_tagger(_M_pattern, _M_results); + __ret = _M_dfs<__match_mode>(__state._M_next); + break; + case _S_opcode_subexpr_end: + __state._M_tagger(_M_pattern, _M_results); + __ret = _M_dfs<__match_mode>(__state._M_next); + _M_results._M_set_matched(__state._M_subexpr, __ret); + break; + case _S_opcode_match: + if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern)) + { + _M_pattern._M_next(); + __ret = _M_dfs<__match_mode>(__state._M_next); + _M_pattern._M_prev(); + } + break; + case _S_opcode_accept: + if (__match_mode) + __ret = _M_pattern._M_at_end(); + else + __ret = true; + break; + default: + _GLIBCXX_DEBUG_ASSERT( false ); + } + return __ret; + } + inline void _Grep_matcher:: _M_match() { diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc new file mode 100644 index 00000000000..383ed054a90 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc @@ -0,0 +1,52 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-07-23 Tim Shen <timshen91@gmail.com> +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match +// Tests Extended grouping against a std::string target. + +#include <regex> +#include <testsuite_hooks.h> + +// libstdc++/53622 +void +test01() +{ + bool test __attribute__((unused)) = true; + + std::regex re("zxcv/(one.*)abc", std::regex::extended); + std::string target("zxcv/onetwoabc"); + std::smatch m; + + VERIFY( std::regex_search(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( m[0].matched == true ); + VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); + VERIFY( m[1].matched == true ); + VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc new file mode 100644 index 00000000000..3031c43d188 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc @@ -0,0 +1,50 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-07-23 Tim Shen <timshen91@gmail.com> +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.3 regex_search +// Tests Extended against a std::string target. + +#include <regex> +#include <testsuite_hooks.h> +#include <iostream> + +// libstdc++/57173 +void +test01() +{ + bool test __attribute__((unused)) = true; + + std::regex re("/asdf(/.*)", std::regex::extended); + std::string target("/asdf/qwerty"); + std::smatch m; + + VERIFY( std::regex_match(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc index f2a7f2104c1..ee487f19836 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc @@ -1,5 +1,4 @@ // { dg-options "-std=gnu++11" } -// { dg-do run { xfail *-*-* } } // // 2013-07-17 Tim Shen <timshen91@gmail.com> @@ -32,7 +31,7 @@ test01() { bool test __attribute__((unused)) = true; - std::regex re("as(df)", std::regex::basic); + std::regex re("as\\(df\\)", std::regex::basic); std::string target("xxasdfyy"); std::smatch m; |