diff options
author | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-09-02 22:20:56 +0000 |
---|---|---|
committer | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-09-02 22:20:56 +0000 |
commit | 094a5031d6ac73f3817104ed80bf80f2d8810b2c (patch) | |
tree | 35679ea7831ed6df022d4a35fa92b9b36166cb35 /libstdc++-v3/include | |
parent | 8b8cc022d08cfe154ff4dcfbeb4fc0cb41e11c45 (diff) | |
download | gcc-094a5031d6ac73f3817104ed80bf80f2d8810b2c.tar.gz |
2013-09-02 Tim Shen <timshen91@gmail.com>
* regex_automaton.h: Rearrange _NFA's layout.
* include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher.
Rearrange _BracketMatcher's layout.
(_BracketMatcher<>::_M_add_char): Use set instead of vector for
_M_char_set.
(_BracketMatcher<>::_M_add_collating_element): Likewise.
(_BracketMatcher<>::_M_make_range): Likewise.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use
apropriate constructors of matchers above.
* testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc:
New.
* testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@202189 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3/include')
-rw-r--r-- | libstdc++-v3/include/bits/regex_automaton.h | 4 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 88 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.tcc | 44 |
3 files changed, 81 insertions, 55 deletions
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index f9e9630636b..2c872aa9482 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -206,12 +206,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_dot(std::ostream& __ostr) const; #endif + std::vector<unsigned int> _M_paren_stack; + _StateSet _M_accepting_states; _FlagT _M_flags; _StateIdT _M_start_state; - _StateSet _M_accepting_states; _SizeT _M_subexpr_count; bool _M_has_backref; - std::vector<unsigned int> _M_paren_stack; }; /// Describes a sequence of one or more %_State, its current start diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index a1107bb7eeb..55ecdb92d41 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -125,12 +125,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _TraitsT& _M_traits; _ScannerT _M_scanner; - _StringT _M_value; _RegexT _M_state_store; + _StringT _M_value; _StackT _M_stack; _FlagT _M_flags; }; + template<typename _CharT, typename _TraitsT> + struct _AnyMatcher + { + explicit + _AnyMatcher(const _TraitsT& __traits) + : _M_traits(__traits) + { } + + bool + operator()(_CharT __ch) const + { + return _M_traits.translate(__ch) != '\n' + && _M_traits.translate(__ch) != '\r' + && _M_traits.translate(__ch) != u'\u2028' + && _M_traits.translate(__ch) != u'\u2029'; + } + + const _TraitsT& _M_traits; + }; + + template<typename _CharT, typename _TraitsT> + struct _CharMatcher + { + typedef regex_constants::syntax_option_type _FlagT; + + explicit + _CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags) + : _M_ch(_M_translate(__ch)), _M_traits(__traits), _M_flags(__flags) + { } + + bool + operator()(_CharT __ch) const + { return _M_ch == _M_translate(__ch); } + + _CharT + _M_translate(_CharT __ch) const + { + if (_M_flags & regex_constants::icase) + return _M_traits.translate_nocase(__ch); + else + return _M_traits.translate(__ch); + } + + const _TraitsT& _M_traits; + _FlagT _M_flags; + _CharT _M_ch; + }; + /// Matches a character range (bracket expression) template<typename _CharT, typename _TraitsT> struct _BracketMatcher @@ -141,9 +189,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION explicit _BracketMatcher(bool __is_non_matching, - const _TraitsT& __t, + const _TraitsT& __traits, _FlagT __flags) - : _M_is_non_matching(__is_non_matching), _M_traits(__t), + : _M_is_non_matching(__is_non_matching), _M_traits(__traits), _M_flags(__flags), _M_class_set(0) { } @@ -152,7 +200,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_add_char(_CharT __c) - { _M_char_set.push_back(_M_translate(__c)); } + { _M_char_set.insert(_M_translate(__c)); } void _M_add_collating_element(const _StringT& __s) @@ -162,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__st.empty()) __throw_regex_error(regex_constants::error_collate); // TODO: digraph - _M_char_set.push_back(__st[0]); + _M_char_set.insert(_M_translate(__st[0])); } void @@ -186,21 +234,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { - _M_range_set.push_back( - make_pair(_M_get_str(_M_translate(__l)), - _M_get_str(_M_translate(__r)))); + if (_M_flags & regex_constants::collate) + _M_range_set.insert( + make_pair(_M_get_str(_M_translate(__l)), + _M_get_str(_M_translate(__r)))); + else + _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r))); } _CharT _M_translate(_CharT __c) const { - if (_M_flags & regex_constants::collate) - if (_M_is_icase()) - return _M_traits.translate_nocase(__c); - else - return _M_traits.translate(__c); + if (_M_is_icase()) + return _M_traits.translate_nocase(__c); else - return __c; + return _M_traits.translate(__c); } bool @@ -214,12 +262,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return _M_traits.transform(__s.begin(), __s.end()); } - const _TraitsT& _M_traits; - _FlagT _M_flags; - bool _M_is_non_matching; - std::vector<_CharT> _M_char_set; - std::vector<pair<_StringT, _StringT>> _M_range_set; - _CharClassT _M_class_set; + std::set<_CharT> _M_char_set; + std::set<pair<_StringT, _StringT>> _M_range_set; + const _TraitsT& _M_traits; + _CharClassT _M_class_set; + _FlagT _M_flags; + bool _M_is_non_matching; }; //@} regex-detail diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index bed091a4486..e41b251c257 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -204,32 +204,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (_M_match_token(_ScannerT::_S_token_anychar)) { - const static auto& - __any_matcher = [](_CharT __ch) -> bool - { return true; }; - _M_stack.push(_StateSeqT(_M_state_store, _M_state_store._M_insert_matcher - (__any_matcher))); + (_AnyMatcher<_CharT, _TraitsT>(_M_traits)))); return true; } if (_M_try_char()) { - _CharT __c = _M_value[0]; - __detail::_Matcher<_CharT> f; - if (_M_flags & regex_constants::icase) - { - auto __traits = this->_M_traits; - __c = __traits.translate_nocase(__c); - f = [__traits, __c](_CharT __ch) -> bool - { return __traits.translate_nocase(__ch) == __c; }; - } - else - f = [__c](_CharT __ch) -> bool - { return __ch == __c; }; - _M_stack.push(_StateSeqT(_M_state_store, - _M_state_store._M_insert_matcher(f))); + _M_state_store._M_insert_matcher + (_CharMatcher<_CharT, _TraitsT>(_M_value[0], + _M_traits, + _M_flags)))); return true; } if (_M_match_token(_ScannerT::_S_token_backref)) @@ -374,26 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool __ret = false; if (_M_traits.isctype(__ch, _M_class_set)) __ret = true; + else if (_M_char_set.count(_M_translate(__ch))) + __ret = true; else { - __ch = _M_translate(__ch); - - for (auto __c : _M_char_set) - if (__c == __ch) + _StringT __s = _M_get_str(_M_flags & regex_constants::collate + ? _M_translate(__ch) : __ch); + for (auto& __it : _M_range_set) + if (__it.first <= __s && __s <= __it.second) { __ret = true; break; } - if (!__ret) - { - _StringT __s = _M_get_str(__ch); - for (auto& __it : _M_range_set) - if (__it.first <= __s && __s <= __it.second) - { - __ret = true; - break; - } - } } if (_M_is_non_matching) return !__ret; |