summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-10-20 10:07:29 +0000
committertimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-10-20 10:07:29 +0000
commit53c19c986606bf484095e57daddcf2e33da79cad (patch)
tree7769a5568df6939da675c46462f0680614d44d01
parentfe019f032fe177df5b9ba3c31297a48ba6382cff (diff)
downloadgcc-53c19c986606bf484095e57daddcf2e33da79cad.tar.gz
2013-10-20 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Remove virtual class _Automaton. * include/bits/regex_automaton.h: Likewise. * include/bits/regex.tcc: Adjust comment for policy changing. * include/bits/regex_executor.h: Update comments of complexity. * include/bits/regex_executor.tcc: Adjust executor choosing policy. Now DFS executor is the default one. * testsuite/util/testsuite_regex.h (regex_match_debug, regex_search_debug): Adjust for policy changing. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203875 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--libstdc++-v3/ChangeLog11
-rw-r--r--libstdc++-v3/include/bits/regex.h2
-rw-r--r--libstdc++-v3/include/bits/regex.tcc4
-rw-r--r--libstdc++-v3/include/bits/regex_automaton.h24
-rw-r--r--libstdc++-v3/include/bits/regex_executor.h36
-rw-r--r--libstdc++-v3/include/bits/regex_executor.tcc42
-rw-r--r--libstdc++-v3/testsuite/util/testsuite_regex.h4
7 files changed, 66 insertions, 57 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 7717da08e8a..0957953de2f 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,14 @@
+2013-10-20 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex.h: Remove virtual class _Automaton.
+ * include/bits/regex_automaton.h: Likewise.
+ * include/bits/regex.tcc: Adjust comment for policy changing.
+ * include/bits/regex_executor.h: Update comments of complexity.
+ * include/bits/regex_executor.tcc: Adjust executor choosing
+ policy. Now DFS executor is the default one.
+ * testsuite/util/testsuite_regex.h (regex_match_debug,
+ regex_search_debug): Adjust for policy changing.
+
2013-10-20 Chris Jefferson <chris@bubblescope.net>
Paolo Carlini <paolo.carlini@oracle.com>
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 5d1a8f4b71e..32d38b491bd 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -727,7 +727,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
#endif
protected:
- typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
+ typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
_AutomatonPtr;
template<typename _BiIter, typename _Alloc,
diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc
index b3b5314d744..7028480ed77 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -38,8 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Result of merging regex_match and regex_search.
//
- // __policy now can be _S_auto(auto dispatch by checking back-references)
- // and _S_force_dfs(just use _DFSExecutor).
+ // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
+ // the other one if possible, for test purpose).
//
// That __match_mode is true means regex_match, else regex_search.
template<typename _BiIter, typename _Alloc,
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index cb944990b49..4fb555680ba 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -104,31 +104,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif
};
- /// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
- template<typename _CharT, typename _TraitsT>
- class _Automaton
- {
- public:
- typedef size_t _SizeT;
-
- public:
- virtual
- ~_Automaton()
- { }
-
- virtual _SizeT
- _M_sub_count() const = 0;
-
-#ifdef _GLIBCXX_DEBUG
- virtual std::ostream&
- _M_dot(std::ostream& __ostr) const = 0;
-#endif
- };
-
template<typename _CharT, typename _TraitsT>
class _NFA
- : public _Automaton<_CharT, _TraitsT>,
- public std::vector<_State<_CharT, _TraitsT>>
+ : public std::vector<_State<_CharT, _TraitsT>>
{
public:
typedef _State<_CharT, _TraitsT> _StateT;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 018b649a25a..6d9a83e8c5c 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -179,8 +179,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// TODO: This approach is exponentially slow for certain input.
// Try to compile the NFA to a DFA.
//
- // Time complexity: exponential
- // Space complexity: O(__end - __begin)
+ // Time complexity: o(match_length), O(2^(_M_nfa->size()))
+ // Space complexity: \theta(match_results.size() + match_length)
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _DFSExecutor
@@ -200,16 +200,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __re,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
- (__re._M_automaton)),
- _M_start_state(_M_nfa._M_start())
+ _M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
{ }
private:
void
_M_init(_BiIter __cur)
{
- _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+ _M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
this->_M_current = __cur;
}
@@ -235,9 +233,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
// To record current solution.
- _ResultsVec _M_cur_results;
- const _NFAT& _M_nfa;
- _StateIdT _M_start_state;
+ std::shared_ptr<_NFAT> _M_nfa;
+ _ResultsVec _M_cur_results;
+ _StateIdT _M_start_state;
};
// Like the DFS approach, it try every possible state transition; Unlike DFS,
@@ -251,8 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// matching head. When states transit, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
- // Time complexity: O((__end - __begin) * _M_nfa.size())
- // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
+ // Time complexity: o(match_length * (quantifier_number
+ // + match_results.size())
+ // O(match_length * _M_nfa->size()
+ // * (quantifier_number + match_results.size())
+ // Space complexity: o(quantifier_number + match_results.size())
+ // O(_M_nfa->size()
+ // * (quantifier_number + match_results.size())
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _BFSExecutor
@@ -382,11 +385,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __re,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
- (__re._M_automaton)),
- _M_match_stack(_M_nfa.size()),
- _M_stack(_M_nfa.size()),
- _M_start_state(_M_nfa._M_start())
+ _M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
+ _M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
{ }
private:
@@ -398,7 +398,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsVec& __res(this->_M_results);
_M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(),
- _M_nfa._M_quant_count));
+ _M_nfa->_M_quant_count));
_M_stack._M_push(this->_M_start_state);
}
@@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
this->_M_flags));
}
- const _NFAT& _M_nfa;
+ std::shared_ptr<_NFAT> _M_nfa;
std::map<_StateIdT, _ResultsPtr> _M_covered;
_TodoList _M_match_stack;
_TodoList _M_stack;
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index b8755736e60..7c084add031 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -28,6 +28,13 @@
* Do not attempt to use it directly. @headername{regex}
*/
+// See below __get_executor to get what this is talking about. The default
+// value 1 indicated a conservative optimization without giving up worst case
+// performance.
+#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
+#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
+#endif
+
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
@@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_dfs(_StateIdT __i)
{
auto& __current = this->_M_current;
- const auto& __state = _M_nfa[__i];
+ const auto& __state = (*_M_nfa)[__i];
bool __ret = false;
switch (__state._M_opcode)
{
@@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __u = _M_stack._M_pop();
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
- const auto& __state = _M_nfa[__u];
+ const auto& __state = (*_M_nfa)[__u];
// Can be implemented using method, but there will be too many
// arguments. I would use macro function before C++11, but lambda is
@@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
while (!_M_match_stack._M_empty())
{
auto __u = _M_match_stack._M_pop();
- const auto& __state = _M_nfa[__u];
+ const auto& __state = (*_M_nfa)[__u];
auto& __cu = _M_covered[__u];
if (__state._M_matches(*this->_M_current)
&& (__next.count(__state._M_next) == 0
@@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_includes_some()
{
bool __succ = false;
- for (auto __u : _M_nfa._M_final_states())
+ for (auto __u : _M_nfa->_M_final_states())
if (_M_covered.count(__u))
{
__succ = true;
@@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
enum class _RegexExecutorPolicy : int
- { _S_auto, _S_force_dfs };
+ { _S_auto, _S_alternate };
+ // This function decide which executor to use under given circumstances.
+ // The _S_auto policy now is the following: if a NFA has no back-references
+ // and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
+ // (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
+ // because _DFSExecutor has a exponential upper bound, but better best-case
+ // performace. Meanwhile, _BFSExecutor can effectively prevent from
+ // exponential-long time matching (which must contains many quantifiers), but
+ // it's slower in average.
+ //
+ // For simple regex, _BFSExecutor could be 2 or more times slower than
+ // _DFSExecutor.
+ //
+ // Of course, _BFSExecutor cannot handle back-references.
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT,
_RegexExecutorPolicy __policy>
@@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ExecutorPtr;
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
- auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
- (__re._M_automaton);
- if (__policy == _RegexExecutorPolicy::_S_force_dfs
- || (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref))
- return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
- return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
+ if (!__re._M_automaton->_M_has_backref
+ && (__policy == _RegexExecutorPolicy::_S_alternate
+ || __re._M_automaton->_M_quant_count
+ > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
+ return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
+ return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
}
_GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/testsuite/util/testsuite_regex.h b/libstdc++-v3/testsuite/util/testsuite_regex.h
index 17be78d9c1b..bc0f3d5bfd9 100644
--- a/libstdc++-v3/testsuite/util/testsuite_regex.h
+++ b/libstdc++-v3/testsuite/util/testsuite_regex.h
@@ -148,7 +148,7 @@ namespace __gnu_test
(__s, __e, __m, __re, __flags);
match_results<_Bi_iter, _Alloc> __mm;
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
- _RegexExecutorPolicy::_S_force_dfs, true>
+ _RegexExecutorPolicy::_S_alternate, true>
(__s, __e, __mm, __re, __flags);
if (__res1 == __res2 && __m == __mm)
return __res1;
@@ -234,7 +234,7 @@ namespace __gnu_test
(__s, __e, __m, __re, __flags);
match_results<_Bi_iter, _Alloc> __mm;
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
- _RegexExecutorPolicy::_S_force_dfs, false>
+ _RegexExecutorPolicy::_S_alternate, false>
(__s, __e, __mm, __re, __flags);
if (__res1 == __res2 && __m == __mm)
return __res1;