summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Shen <timshen91@gmail.com>2013-07-30 12:02:55 +0000
committerTim Shen <timshen@gcc.gnu.org>2013-07-30 12:02:55 +0000
commita6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b (patch)
tree3806c0e659bf08c57518f2bf0aca08fb7785d419
parent605e86fa3f9faa7f244ead20033aa1263d635c21 (diff)
downloadgcc-a6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b.tar.gz
Thompson matcher refactored.
2013-07-30 Tim Shen <timshen91@gmail.com> Thompson matcher refactored. Fix grouping problem. * include/bits/regex.h: Use a dispatcher _M_get_matcher(). * include/bits/regex_compiler.h: Tweak for auto switching. * include/bits/regex_grep_matcher.h: Class structure. * include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson matcher) refactoring. * include/bits/regex_nfa.h: Change _Results's interfaces. * include/std/regex: Includes <map> and <queue>. * testsuite/28_regex/algorithms/regex_match/extended/53622.cc: For both matchers. * testsuite/28_regex/algorithms/regex_match/extended/57173.cc: For both matchers. * testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc: New. From-SVN: r201334
-rw-r--r--libstdc++-v3/ChangeLog17
-rw-r--r--libstdc++-v3/include/bits/regex.h9
-rw-r--r--libstdc++-v3/include/bits/regex_compiler.h3
-rw-r--r--libstdc++-v3/include/bits/regex_grep_matcher.h151
-rw-r--r--libstdc++-v3/include/bits/regex_grep_matcher.tcc258
-rw-r--r--libstdc++-v3/include/bits/regex_nfa.h50
-rw-r--r--libstdc++-v3/include/std/regex2
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc35
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc23
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc71
10 files changed, 419 insertions, 200 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index edb17eae0c0..db9a0358afc 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,20 @@
+2013-07-30 Tim Shen <timshen91@gmail.com>
+
+ Thompson matcher refactored. Fix grouping problem.
+ * include/bits/regex.h: Use a dispatcher _M_get_matcher().
+ * include/bits/regex_compiler.h: Tweak for auto switching.
+ * include/bits/regex_grep_matcher.h: Class structure.
+ * include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson
+ matcher) refactoring.
+ * include/bits/regex_nfa.h: Change _Results's interfaces.
+ * include/std/regex: Includes <map> and <queue>.
+ * testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
+ For both matchers.
+ * testsuite/28_regex/algorithms/regex_match/extended/57173.cc:
+ For both matchers.
+ * testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc:
+ New.
+
2013-07-29 Nathan Froyd <froydnj@gcc.gnu.org>
* include/std/atomic (compare_exchange_weak, compare_exchange_strong):
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 56928484785..c1f65339848 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -2175,7 +2175,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
regex_match(_Bi_iter __s,
_Bi_iter __e,
- match_results<_Bi_iter, _Alloc>& __m,
+ match_results<_Bi_iter, _Alloc>& __m,
const basic_regex<_Ch_type, _Rx_traits>& __re,
regex_constants::match_flag_type __flags
= regex_constants::match_default)
@@ -2184,8 +2184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
- __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
- return __matcher._M_dfs_match();
+ return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match();
}
/**
@@ -2336,8 +2335,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
{
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
- __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
- if (__matcher._M_dfs_search_from_first())
+ auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags);
+ if (__matcher->_M_search_from_first())
{
__r._M_set_range(__m.size(),
__detail::_SpecializedCursor<_Bi_iter>
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index dae0948404c..badea8c641e 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -936,7 +936,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_match_token(_ScannerT::_S_token_backref))
{
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
- return true;
+ _M_state_store._M_set_back_ref(true);
+ //return true;
}
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h
index 8686cc933be..d9270e5dc94 100644
--- a/libstdc++-v3/include/bits/regex_grep_matcher.h
+++ b/libstdc++-v3/include/bits/regex_grep_matcher.h
@@ -60,6 +60,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m);
+ ~_SpecializedResults()
+ {
+ if (_M_managed)
+ delete &_M_results;
+ }
+
+ private:
+ _SpecializedResults(const _SpecializedResults& __rhs)
+ : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
+ _M_managed(true)
+ { }
+
+ public:
void
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
@@ -76,8 +89,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_set_matched(int __i, bool __is_matched)
{ _M_results.at(__i).matched = __is_matched; }
+ std::unique_ptr<_Results>
+ _M_clone() const
+ { return unique_ptr<_Results>(new _SpecializedResults(*this)); }
+
+ void
+ _M_assign(const _Results& __rhs)
+ {
+ auto __r = static_cast<const _SpecializedResults*>(&__rhs);
+ _M_results = __r->_M_results;
+ }
+
private:
match_results<_FwdIterT, _Alloc>& _M_results;
+ bool _M_managed;
};
template<typename _FwdIterT, typename _Alloc>
@@ -85,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m)
- : _M_results(__m)
+ : _M_results(__m), _M_managed(false)
{
_M_results.clear();
_M_results.reserve(__size + 2);
@@ -105,14 +130,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
if (__j == 0)
- _M_results.at(__i).first = __c._M_pos();
+ _M_results.at(__i).first = __c._M_pos();
else
_M_results.at(__i).second = __c._M_pos();
}
- /// A stack of states used in evaluating the NFA.
- typedef std::stack<_StateIdT, std::vector<_StateIdT> > _StateStack;
-
/// Executes a regular expression NFA/DFA over a range using a
/// variant of the parallel execution algorithm featured in the grep
/// utility, modified to use Laurikari tags.
@@ -124,47 +146,126 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
- _M_pattern(__p), _M_results(__r)
+ _M_str_cur(__p), _M_results(__r)
+ { }
+
+ virtual
+ ~_Grep_matcher()
{ }
// Set matched when string exactly match the pattern.
- void
- _M_match();
+ virtual bool
+ _M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
- void
- _M_search_from_first();
+ virtual bool
+ _M_search_from_first() = 0;
+
+ protected:
+ const std::shared_ptr<_Nfa> _M_nfa;
+ _PatternCursor& _M_str_cur;
+ _Results& _M_results;
+ };
+
+ // Time complexity: exponential
+ // Space complexity: O(_M_str_cur.size())
+ // _M_dfs() take a state, along with current string cursor(_M_str_cur),
+ // trying to match current state with current character.
+ // Only _S_opcode_match will consume a character.
+ class _DFSMatcher
+ : public _Grep_matcher
+ {
+ public:
+ _DFSMatcher(_PatternCursor& __p,
+ _Results& __r,
+ const _AutomatonPtr& __automaton,
+ regex_constants::match_flag_type __flags)
+ : _Grep_matcher(__p, __r, __automaton, __flags)
+ { }
- // TODO: in the future this function will be _M_match, in another class.
bool
- _M_dfs_match()
+ _M_match()
{ return _M_dfs<true>(_M_nfa->_M_start()); }
- // TODO: in the future this function will be _M_search_from_first,
- // in another class.
bool
- _M_dfs_search_from_first()
+ _M_search_from_first()
{ return _M_dfs<false>(_M_nfa->_M_start()); }
private:
- _StateSet
- _M_e_closure(_StateIdT __i);
+ template<bool __match_mode>
+ bool
+ _M_dfs(_StateIdT __i);
+ };
+
+ // It's essentially a variant of Single-Source-Shortest-Path problem, where,
+ // the matching results is the final distance and should be minimized.
+ // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
+ // (BFS-like) Bellman-Ford algorithm,
+ // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
+ //
+ // Every entry of _M_current saves the solution(grouping status) for every
+ // matching head. When states transfer, solutions will be compared and
+ // deduplicated(based on which greedy mode we have).
+ //
+ // Time complexity: O(_M_str_cur.size() * _M_nfa.size())
+ // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
+ class _BFSMatcher
+ : public _Grep_matcher
+ {
+ public:
+ _BFSMatcher(_PatternCursor& __p,
+ _Results& __r,
+ const _AutomatonPtr& __automaton,
+ regex_constants::match_flag_type __flags)
+ : _Grep_matcher(__p, __r, __automaton, __flags)
+ {
+ if (_M_nfa->_M_start() != _S_invalid_state_id)
+ _M_current[_M_nfa->_M_start()] = _M_results._M_clone();
+ _M_e_closure();
+ }
- _StateSet
- _M_e_closure(const _StateSet& __s);
+ bool
+ _M_match()
+ { return _M_main_loop<true>(); }
- _StateSet
- _M_e_closure(_StateStack& __stack, const _StateSet& __s);
+ bool
+ _M_search_from_first()
+ { return _M_main_loop<false>(); }
+ private:
template<bool __match_mode>
bool
- _M_dfs(_StateIdT __i);
+ _M_main_loop();
- const std::shared_ptr<_Nfa> _M_nfa;
- _PatternCursor& _M_pattern;
- _Results& _M_results;
+ void
+ _M_e_closure();
+
+ void
+ _M_move();
+
+ bool
+ _M_match_less_than(_StateIdT __u, _StateIdT __v) const;
+
+ bool
+ _M_includes_some() const;
+
+ std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
};
+ std::unique_ptr<_Grep_matcher> _Nfa::
+ _M_get_matcher(_PatternCursor& __p,
+ _Results& __r,
+ const _AutomatonPtr& __a,
+ regex_constants::match_flag_type __flags)
+ {
+ if (_M_has_back_ref)
+ return unique_ptr<_Grep_matcher>(
+ new _DFSMatcher(__p, __r, __a, __flags));
+ else
+ return unique_ptr<_Grep_matcher>(
+ new _BFSMatcher(__p, __r, __a, __flags));
+ }
+
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc
index dccdfda0bc1..2cfc790b6e6 100644
--- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc
+++ b/libstdc++-v3/include/bits/regex_grep_matcher.tcc
@@ -32,83 +32,13 @@
namespace std _GLIBCXX_VISIBILITY(default)
{
-namespace
-{
- // A stack of states used in evaluating the NFA.
- typedef std::stack<std::__detail::_StateIdT,
- std::vector<std::__detail::_StateIdT>
- > _StateStack;
-
- // Obtains the next state set given the current state set __s and the current
- // input character.
- inline std::__detail::_StateSet
- __move(const std::__detail::_PatternCursor& __p,
- const std::__detail::_Nfa& __nfa,
- const std::__detail::_StateSet& __s)
- {
- std::__detail::_StateSet __m;
- for (std::__detail::_StateSet::const_iterator __i = __s.begin();
- __i != __s.end(); ++__i)
- {
- if (*__i == std::__detail::_S_invalid_state_id)
- continue;
-
- const std::__detail::_State& __state = __nfa[*__i];
- if (__state._M_opcode == std::__detail::_S_opcode_match
- && __state._M_matches(__p))
- __m.insert(__state._M_next);
- }
- return __m;
- }
-
- // returns true if (__s intersect __t) is not empty
- inline bool
- __includes_some(const std::__detail::_StateSet& __s,
- const std::__detail::_StateSet& __t)
- {
- if (__s.size() > 0 && __t.size() > 0)
- {
- std::__detail::_StateSet::const_iterator __first = __s.begin();
- std::__detail::_StateSet::const_iterator __second = __t.begin();
- while (__first != __s.end() && __second != __t.end())
- {
- if (*__first < *__second)
- ++__first;
- else if (*__second < *__first)
- ++__second;
- else
- return true;
- }
- }
- return false;
- }
-
- // If an identified state __u is not already in the current state set __e,
- // insert it and push it on the current state stack __s.
- inline void
- __add_visited_state(const std::__detail::_StateIdT __u,
- _StateStack& __s,
- std::__detail::_StateSet& __e)
- {
- if (__e.count(__u) == 0)
- {
- __e.insert(__u);
- __s.push(__u);
- }
- }
-
-} // anonymous namespace
-
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
- // _M_dfs() take a state, along with current string cursor(_M_pattern),
- // trying to match current state with current character.
- // Only _S_opcode_match will consume a character.
// TODO: This is too slow. Try to compile the NFA to a DFA.
template<bool __match_mode>
- bool _Grep_matcher::
+ bool _DFSMatcher::
_M_dfs(_StateIdT __i)
{
if (__i == _S_invalid_state_id)
@@ -126,116 +56,162 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_subexpr_begin:
- __state._M_tagger(_M_pattern, _M_results);
+ __state._M_tagger(_M_str_cur, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_subexpr_end:
- __state._M_tagger(_M_pattern, _M_results);
+ __state._M_tagger(_M_str_cur, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next);
_M_results._M_set_matched(__state._M_subexpr, __ret);
break;
case _S_opcode_match:
- if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
+ if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
{
- _M_pattern._M_next();
+ _M_str_cur._M_next();
__ret = _M_dfs<__match_mode>(__state._M_next);
- _M_pattern._M_prev();
+ _M_str_cur._M_prev();
}
break;
case _S_opcode_accept:
if (__match_mode)
- __ret = _M_pattern._M_at_end();
+ __ret = _M_str_cur._M_at_end();
else
__ret = true;
break;
default:
- _GLIBCXX_DEBUG_ASSERT( false );
+ _GLIBCXX_DEBUG_ASSERT(false);
}
return __ret;
}
- inline void _Grep_matcher::
- _M_match()
- {
- __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
- for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
- __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
-
- _M_results._M_set_matched(0,
- __includes_some(_M_nfa->_M_final_states(), __t));
- }
+ template<bool __match_mode>
+ bool _BFSMatcher::
+ _M_main_loop()
+ {
+ while (!_M_str_cur._M_at_end())
+ {
+ if (!__match_mode)
+ if (_M_includes_some())
+ return true;
+ _M_move();
+ _M_str_cur._M_next();
+ _M_e_closure();
+ }
+ return _M_includes_some();
+ }
- inline void _Grep_matcher::
- _M_search_from_first()
+ // The SPFA approach.
+ void _BFSMatcher::
+ _M_e_closure()
{
- __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start());
- for (; !_M_pattern._M_at_end(); _M_pattern._M_next())
+ std::queue<_StateIdT> __q;
+ std::vector<bool> __in_q(_M_nfa->size(), false);
+ for (auto& __it : _M_current)
{
- if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS
- {
- _M_results._M_set_matched(0, true);
+ __in_q[__it.first] = true;
+ __q.push(__it.first);
+ }
+ while (!__q.empty())
+ {
+ auto __u = __q.front();
+ __q.pop();
+ __in_q[__u] = false;
+ const auto& __state = (*_M_nfa)[__u];
+
+ // Can be implemented using method, but there're too much arguments.
+ auto __add_visited_state = [&](_StateIdT __v)
+ {
+ if (__v == _S_invalid_state_id)
return;
+ if (_M_match_less_than(__u, __v))
+ {
+ _M_current[__v] = _M_current[__u]->_M_clone();
+ // if a state is updated, it's outgoing neighbors should be
+ // reconsidered too. Push them to the queue.
+ if (!__in_q[__v])
+ {
+ __in_q[__v] = true;
+ __q.push(__v);
+ }
+ }
+ };
+
+ switch (__state._M_opcode)
+ {
+ case _S_opcode_alternative:
+ __add_visited_state(__state._M_next);
+ __add_visited_state(__state._M_alt);
+ break;
+ case _S_opcode_subexpr_begin:
+ __state._M_tagger(_M_str_cur, *_M_current[__u]);
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_subexpr_end:
+ __state._M_tagger(_M_str_cur, *_M_current[__u]);
+ _M_current[__u]->_M_set_matched(__state._M_subexpr, true);
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_match:
+ break;
+ case _S_opcode_accept:
+ __add_visited_state(__state._M_next);
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
}
- __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t));
}
- _M_results._M_set_matched(0, false);
}
- // Creates the e-closure set for the initial state __i.
- inline _StateSet _Grep_matcher::
- _M_e_closure(_StateIdT __i)
+ void _BFSMatcher::
+ _M_move()
{
- _StateSet __s;
- __s.insert(__i);
- _StateStack __stack;
- __stack.push(__i);
- return this->_M_e_closure(__stack, __s);
+ decltype(_M_current) __next;
+ for (auto& __it : _M_current)
+ {
+ const auto& __state = (*_M_nfa)[__it.first];
+ if (__state._M_opcode == _S_opcode_match
+ && __state._M_matches(_M_str_cur))
+ if (_M_match_less_than(__it.first, __state._M_next)
+ && __state._M_next != _S_invalid_state_id)
+ __next[__state._M_next] = __it.second->_M_clone();
+ }
+ _M_current = move(__next);
}
- // Creates the e-closure set for an arbitrary state set __s.
- inline _StateSet _Grep_matcher::
- _M_e_closure(const _StateSet& __s)
+ bool _BFSMatcher::
+ _M_match_less_than(_StateIdT __u, _StateIdT __v) const
{
- _StateStack __stack;
- for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i)
- __stack.push(*__i);
- return this->_M_e_closure(__stack, __s);
+ if (_M_current.count(__u) == 0)
+ return false;
+ if (_M_current.count(__v) > 0)
+ return true;
+ // TODO: Greedy and Non-greedy support
+ return true;
}
- inline _StateSet _Grep_matcher::
- _M_e_closure(_StateStack& __stack, const _StateSet& __s)
+ bool _BFSMatcher::
+ _M_includes_some() const
{
- _StateSet __e = __s;
- while (!__stack.empty())
+ auto& __s = _M_nfa->_M_final_states();
+ auto& __t = _M_current;
+ if (__s.size() > 0 && __t.size() > 0)
{
- _StateIdT __t = __stack.top(); __stack.pop();
- if (__t == _S_invalid_state_id)
- continue;
- // for each __u with edge from __t to __u labeled e do ...
- const _State& __state = _M_nfa->operator[](__t);
- switch (__state._M_opcode)
+ auto __first = __s.begin();
+ auto __second = __t.begin();
+ while (__first != __s.end() && __second != __t.end())
{
- case _S_opcode_alternative:
- __add_visited_state(__state._M_next, __stack, __e);
- __add_visited_state(__state._M_alt, __stack, __e);
- break;
- case _S_opcode_subexpr_begin:
- __add_visited_state(__state._M_next, __stack, __e);
- __state._M_tagger(_M_pattern, _M_results);
- break;
- case _S_opcode_subexpr_end:
- __add_visited_state(__state._M_next, __stack, __e);
- __state._M_tagger(_M_pattern, _M_results);
- _M_results._M_set_matched(__state._M_subexpr, true);
- break;
- case _S_opcode_accept:
- __add_visited_state(__state._M_next, __stack, __e);
- break;
- default:
- break;
+ if (*__first < __second->first)
+ ++__first;
+ else if (__second->first < *__first)
+ ++__second;
+ else
+ {
+ _M_results._M_assign(*__second->second);
+ return true;
+ }
}
}
- return __e;
+ return false;
}
_GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/bits/regex_nfa.h b/libstdc++-v3/include/bits/regex_nfa.h
index fc30237436a..1bae1b78ed4 100644
--- a/libstdc++-v3/include/bits/regex_nfa.h
+++ b/libstdc++-v3/include/bits/regex_nfa.h
@@ -39,6 +39,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
+ /// Provides a generic facade for a templated match_results.
+ struct _Results
+ {
+ virtual
+ ~_Results()
+ { }
+ virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
+ virtual void _M_set_matched(int __i, bool __is_matched) = 0;
+ virtual std::unique_ptr<_Results> _M_clone() const = 0;
+ virtual void _M_assign(const _Results& __rhs) = 0;
+ };
+
+ class _Grep_matcher;
+ class _Automaton;
+
+ /// Generic shared pointer to an automaton.
+ typedef std::shared_ptr<_Automaton> _AutomatonPtr;
+
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
class _Automaton
{
@@ -52,15 +70,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
virtual _SizeT
_M_sub_count() const = 0;
+ virtual std::unique_ptr<_Grep_matcher>
+ _M_get_matcher(_PatternCursor& __p,
+ _Results& __r,
+ const _AutomatonPtr& __automaton,
+ regex_constants::match_flag_type __flags) = 0;
+
#ifdef _GLIBCXX_DEBUG
virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0;
#endif
};
- /// Generic shared pointer to an automaton.
- typedef std::shared_ptr<_Automaton> _AutomatonPtr;
-
/// Operation codes that define the type of transitions within the base NFA
/// that represents the regular expression.
enum _Opcode
@@ -73,13 +94,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_opcode_accept = 255
};
- /// Provides a generic facade for a templated match_results.
- struct _Results
- {
- virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
- virtual void _M_set_matched(int __i, bool __is_matched) = 0;
- };
-
/// Tags current state (for subexpr begin/end).
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
@@ -113,7 +127,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ __r._M_set_pos(_M_index, 1, __pc); }
int _M_index;
- _FwdIterT _M_pos;
};
/// Indicates if current state matches cursor current.
@@ -275,7 +288,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef regex_constants::syntax_option_type _FlagT;
_Nfa(_FlagT __f)
- : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
+ : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
+ // TODO: BFS by default. Your choice. Need to be set by the compiler.
+ _M_has_back_ref(false)
{ }
~_Nfa()
@@ -334,6 +349,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return this->size()-1;
}
+ void
+ _M_set_back_ref(bool __b)
+ { _M_has_back_ref = __b; }
+
+ std::unique_ptr<_Grep_matcher>
+ _M_get_matcher(_PatternCursor& __p,
+ _Results& __r,
+ const _AutomatonPtr& __automaton,
+ regex_constants::match_flag_type __flags);
+
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_dot(std::ostream& __ostr) const;
@@ -344,6 +369,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT _M_start_state;
_StateSet _M_accepting_states;
_SizeT _M_subexpr_count;
+ bool _M_has_back_ref;
};
/// Describes a sequence of one or more %_State, its current start
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex
index 907f5bb65d8..b0918ed4afe 100644
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -44,6 +44,8 @@
#include <iterator>
#include <locale>
#include <memory>
+#include <map>
+#include <queue>
#include <set>
#include <sstream>
#include <stack>
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
index 383ed054a90..10e2ff43765 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
@@ -32,16 +32,31 @@ test01()
{
bool test __attribute__((unused)) = true;
- std::regex re("zxcv/(one.*)abc", std::regex::extended);
- std::string target("zxcv/onetwoabc");
- std::smatch m;
-
- VERIFY( std::regex_search(target, m, re) );
- VERIFY( m.size() == 2 );
- VERIFY( m[0].matched == true );
- VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
- VERIFY( m[1].matched == true );
- VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+ {
+ std::regex re("zxcv/(one.*)abc", std::regex::extended);
+ std::string target("zxcv/onetwoabc");
+ std::smatch m;
+
+ VERIFY( std::regex_search(target, m, re) );
+ VERIFY( m.size() == 2 );
+ VERIFY( m[0].matched == true );
+ VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+ VERIFY( m[1].matched == true );
+ VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+ }
+
+ {
+ std::regex re("zxcv/(one.*)abc()\\2", std::regex::extended);
+ std::string target("zxcv/onetwoabc");
+ std::smatch m;
+
+ VERIFY( std::regex_search(target, m, re) );
+ VERIFY( m.size() == 3 );
+ VERIFY( m[0].matched == true );
+ VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+ VERIFY( m[1].matched == true );
+ VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+ }
}
int
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
index 3031c43d188..cb3a54f4d88 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
@@ -33,13 +33,24 @@ test01()
{
bool test __attribute__((unused)) = true;
- std::regex re("/asdf(/.*)", std::regex::extended);
- std::string target("/asdf/qwerty");
- std::smatch m;
+ {
+ std::regex re("/asdf(/.*)", std::regex::extended);
+ std::string target("/asdf/qwerty");
+ std::smatch m;
- VERIFY( std::regex_match(target, m, re) );
- VERIFY( m.size() == 2 );
- VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+ VERIFY( std::regex_match(target, m, re) );
+ VERIFY( m.size() == 2 );
+ VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+ }
+ {
+ std::regex re("/asdf(/.*)()\\2", std::regex::extended);
+ std::string target("/asdf/qwerty");
+ std::smatch m;
+
+ VERIFY( std::regex_match(target, m, re) );
+ VERIFY( m.size() == 3 );
+ VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+ }
}
int
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc
new file mode 100644
index 00000000000..86fab85a434
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc
@@ -0,0 +1,71 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-29 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended automatic matcher dispatching against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+template<typename _Bi_iter, typename _Alloc,
+ typename _Ch_type, typename _Rx_traits>
+ void
+ fake_match(_Bi_iter __s,
+ _Bi_iter __e,
+ match_results<_Bi_iter, _Alloc>& __m,
+ const basic_regex<_Ch_type, _Rx_traits>& __re,
+ regex_constants::match_flag_type __flags
+ = regex_constants::match_default)
+ {
+ __detail::_AutomatonPtr __a = __re._M_get_automaton();
+ __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
+ __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
+ __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
+ VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
+ &*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
+ }
+
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ regex re("()(one(.*))abc\\1"); // backref cause DFS
+ const string target("onetwoabc");
+ smatch m;
+ fake_match(target.begin(), target.end(), m, re);
+
+ regex_match(target, m, re);
+ VERIFY( m[2].matched );
+ VERIFY( m[3].matched );
+ VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
+ VERIFY( std::string(m[3].first, m[3].second) == "two" );
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}