summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-07-24 14:39:54 +0000
committertimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-07-24 14:39:54 +0000
commit6d74a96b7de59131dac76fc8a0b05aa45114ed9f (patch)
treea4d363f2143c17d31818485bc16cce2febbdee66
parent47584ef2c0f361da0a8d491f0c43d93f71c3932c (diff)
downloadgcc-6d74a96b7de59131dac76fc8a0b05aa45114ed9f.tar.gz
2013-07-24 Tim Shen <timshen91@gmail.com>
Reimplment matcher using Depth-first search(backtracking). PR libstdc++/53622 PR libstdc++/57173 * include/bits/regex.h: regex_match() and regex_search(). * include/bits/regex_cursor.h: Fix _M_set_pos(). * include/bits/regex_grep_matcher.h: add _M_dfs_match(). * include/bits/regex_grep_matcher.tcc: Implement it. * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc: New. * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc: New. * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc: Remove xfail. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@201213 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--libstdc++-v3/ChangeLog16
-rw-r--r--libstdc++-v3/include/bits/regex.h6
-rw-r--r--libstdc++-v3/include/bits/regex_cursor.h5
-rw-r--r--libstdc++-v3/include/bits/regex_grep_matcher.h20
-rw-r--r--libstdc++-v3/include/bits/regex_grep_matcher.tcc50
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc52
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc50
-rw-r--r--libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc3
8 files changed, 193 insertions, 9 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index ab21abefed5..45dc9452afb 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,19 @@
+2013-07-24 Tim Shen <timshen91@gmail.com>
+
+ Reimplment matcher using Depth-first search(backtracking).
+ PR libstdc++/53622
+ PR libstdc++/57173
+ * include/bits/regex.h: regex_match() and regex_search().
+ * include/bits/regex_cursor.h: Fix _M_set_pos().
+ * include/bits/regex_grep_matcher.h: add _M_dfs_match().
+ * include/bits/regex_grep_matcher.tcc: Implement it.
+ * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
+ New.
+ * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
+ New.
+ * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
+ Remove xfail.
+
2013-07-23 Tim Shen <timshen91@gmail.com>
Implement regex_iterator and regex_token_iterator.
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 9848f717fc6..64a1807033b 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -2185,8 +2185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
__detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
- __matcher._M_match();
- return __m[0].matched;
+ return __matcher._M_dfs_match();
}
/**
@@ -2338,8 +2337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
__detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
- __matcher._M_search_from_first();
- if (__m[0].matched)
+ if (__matcher._M_dfs_search_from_first())
{
__r._M_set_range(__m.size(),
__detail::_SpecializedCursor<_Bi_iter>
diff --git a/libstdc++-v3/include/bits/regex_cursor.h b/libstdc++-v3/include/bits/regex_cursor.h
index 44e9d1d9239..444d07ae263 100644
--- a/libstdc++-v3/include/bits/regex_cursor.h
+++ b/libstdc++-v3/include/bits/regex_cursor.h
@@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
virtual ~_PatternCursor() { };
virtual void _M_next() = 0;
+ virtual void _M_prev() = 0;
virtual bool _M_at_end() const = 0;
};
@@ -66,6 +67,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_next()
{ ++_M_c; }
+ void
+ _M_prev()
+ { --_M_c; }
+
_FwdIterT
_M_pos() const
{ return _M_c; }
diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h
index da9264d5c17..c402c780f1c 100644
--- a/libstdc++-v3/include/bits/regex_grep_matcher.h
+++ b/libstdc++-v3/include/bits/regex_grep_matcher.h
@@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__j == 0)
_M_results.at(__i).first = __c._M_pos();
else
- _M_results.at(__i).second = __c._M_pos()+1;
+ _M_results.at(__i).second = __c._M_pos();
}
/// A stack of states used in evaluating the NFA.
@@ -127,9 +127,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_pattern(__p), _M_results(__r)
{ }
- void _M_match();
+ void
+ _M_match();
+
+ void
+ _M_search_from_first();
- void _M_search_from_first();
+ bool
+ _M_dfs_match()
+ { return _M_dfs<true>(_M_nfa->_M_start()); }
+
+ bool
+ _M_dfs_search_from_first()
+ { return _M_dfs<false>(_M_nfa->_M_start()); }
private:
_StateSet
@@ -141,6 +151,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSet
_M_e_closure(_StateStack& __stack, const _StateSet& __s);
+ template<bool __match_mode>
+ bool
+ _M_dfs(_StateIdT __i);
+
const std::shared_ptr<_Nfa> _M_nfa;
_PatternCursor& _M_pattern;
_Results& _M_results;
diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc
index 2d4a9a60faa..46b98174888 100644
--- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc
+++ b/libstdc++-v3/include/bits/regex_grep_matcher.tcc
@@ -103,6 +103,56 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ // _M_dfs() take a state, along with current string cursor(_M_pattern),
+ // trying to match current state with current character.
+ // Only _S_opcode_match will consume a character.
+ // TODO: This is too slow. Try to compile the NFA to a DFA.
+ template<bool __match_mode>
+ bool _Grep_matcher::
+ _M_dfs(_StateIdT __i)
+ {
+ if (__i == _S_invalid_state_id)
+ // This is not that certain. Need deeper investigate.
+ return false;
+ const auto& __state = (*_M_nfa)[__i];
+ bool __ret = false;
+ switch (__state._M_opcode)
+ {
+ case _S_opcode_alternative:
+ // Greedy mode by default. For non-greedy mode,
+ // swap _M_alt and _M_next.
+ __ret = _M_dfs<__match_mode>(__state._M_alt)
+ || _M_dfs<__match_mode>(__state._M_next);
+ break;
+ case _S_opcode_subexpr_begin:
+ __state._M_tagger(_M_pattern, _M_results);
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ break;
+ case _S_opcode_subexpr_end:
+ __state._M_tagger(_M_pattern, _M_results);
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ _M_results._M_set_matched(__state._M_subexpr, __ret);
+ break;
+ case _S_opcode_match:
+ if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
+ {
+ _M_pattern._M_next();
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ _M_pattern._M_prev();
+ }
+ break;
+ case _S_opcode_accept:
+ if (__match_mode)
+ __ret = _M_pattern._M_at_end();
+ else
+ __ret = true;
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT( false );
+ }
+ return __ret;
+ }
+
inline void _Grep_matcher::
_M_match()
{
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
new file mode 100644
index 00000000000..383ed054a90
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
@@ -0,0 +1,52 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-23 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended grouping against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+// libstdc++/53622
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ std::regex re("zxcv/(one.*)abc", std::regex::extended);
+ std::string target("zxcv/onetwoabc");
+ std::smatch m;
+
+ VERIFY( std::regex_search(target, m, re) );
+ VERIFY( m.size() == 2 );
+ VERIFY( m[0].matched == true );
+ VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+ VERIFY( m[1].matched == true );
+ VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
new file mode 100644
index 00000000000..3031c43d188
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
@@ -0,0 +1,50 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-23 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests Extended against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <iostream>
+
+// libstdc++/57173
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ std::regex re("/asdf(/.*)", std::regex::extended);
+ std::string target("/asdf/qwerty");
+ std::smatch m;
+
+ VERIFY( std::regex_match(target, m, re) );
+ VERIFY( m.size() == 2 );
+ VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc
index f2a7f2104c1..ee487f19836 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc
@@ -1,5 +1,4 @@
// { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
//
// 2013-07-17 Tim Shen <timshen91@gmail.com>
@@ -32,7 +31,7 @@ test01()
{
bool test __attribute__((unused)) = true;
- std::regex re("as(df)", std::regex::basic);
+ std::regex re("as\\(df\\)", std::regex::basic);
std::string target("xxasdfyy");
std::smatch m;