summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/mbstring/config.m425
-rw-r--r--ext/mbstring/mbstring.c8
-rw-r--r--ext/mbstring/oniguruma/HISTORY360
-rw-r--r--ext/mbstring/oniguruma/INSTALL-RUBY48
-rw-r--r--ext/mbstring/oniguruma/Makefile.in188
-rw-r--r--ext/mbstring/oniguruma/README141
-rwxr-xr-xext/mbstring/oniguruma/configure5414
-rw-r--r--ext/mbstring/oniguruma/configure.in70
-rw-r--r--ext/mbstring/oniguruma/doc/API279
-rw-r--r--ext/mbstring/oniguruma/doc/RE224
-rw-r--r--ext/mbstring/oniguruma/onigcmpt200.h304
-rw-r--r--ext/mbstring/oniguruma/oniggnu.h77
-rw-r--r--ext/mbstring/oniguruma/onigposix.h93
-rw-r--r--ext/mbstring/oniguruma/oniguruma.h924
-rw-r--r--ext/mbstring/oniguruma/php_compat.h39
-rw-r--r--ext/mbstring/oniguruma/re.c.168.patch56
-rw-r--r--ext/mbstring/oniguruma/re.c.180.patch66
-rw-r--r--ext/mbstring/oniguruma/regcomp.c1484
-rw-r--r--ext/mbstring/oniguruma/regenc.c586
-rw-r--r--ext/mbstring/oniguruma/regenc.h97
-rw-r--r--ext/mbstring/oniguruma/regerror.c176
-rw-r--r--ext/mbstring/oniguruma/regex.c14
-rw-r--r--ext/mbstring/oniguruma/regexec.c1464
-rw-r--r--ext/mbstring/oniguruma/reggnu.c93
-rw-r--r--ext/mbstring/oniguruma/regint.h371
-rw-r--r--ext/mbstring/oniguruma/regparse.c2629
-rw-r--r--ext/mbstring/oniguruma/regparse.h90
-rw-r--r--ext/mbstring/oniguruma/regposerr.c2
-rw-r--r--ext/mbstring/oniguruma/regposix.c211
-rw-r--r--ext/mbstring/oniguruma/sample/names.c64
-rw-r--r--ext/mbstring/oniguruma/sample/posix.c92
-rw-r--r--ext/mbstring/oniguruma/sample/simple.c54
-rw-r--r--ext/mbstring/oniguruma/test.rb971
-rw-r--r--ext/mbstring/oniguruma/testc.c (renamed from ext/mbstring/oniguruma/win32/testc.c)665
-rw-r--r--ext/mbstring/oniguruma/testconv.rb223
-rw-r--r--ext/mbstring/oniguruma/win32/Makefile131
-rw-r--r--ext/mbstring/oniguruma/win32/config.h84
-rw-r--r--ext/mbstring/php_mbregex.c357
-rw-r--r--ext/mbstring/php_mbregex.h16
39 files changed, 12030 insertions, 6160 deletions
diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4
index 9cff51897f..ba76165bfc 100644
--- a/ext/mbstring/config.m4
+++ b/ext/mbstring/config.m4
@@ -80,7 +80,32 @@ int main() { return foo(10, "", 3.14); }
oniguruma/regexec.c
oniguruma/reggnu.c
oniguruma/regparse.c
+ oniguruma/regenc.c
oniguruma/regposerr.c
+ oniguruma/enc/ascii.c
+ oniguruma/enc/utf8.c
+ oniguruma/enc/euc_jp.c
+ oniguruma/enc/euc_tw.c
+ oniguruma/enc/euc_kr.c
+ oniguruma/enc/sjis.c
+ oniguruma/enc/iso8859_1.c
+ oniguruma/enc/iso8859_2.c
+ oniguruma/enc/iso8859_3.c
+ oniguruma/enc/iso8859_4.c
+ oniguruma/enc/iso8859_5.c
+ oniguruma/enc/iso8859_6.c
+ oniguruma/enc/iso8859_7.c
+ oniguruma/enc/iso8859_8.c
+ oniguruma/enc/iso8859_9.c
+ oniguruma/enc/iso8859_10.c
+ oniguruma/enc/iso8859_11.c
+ oniguruma/enc/iso8859_13.c
+ oniguruma/enc/iso8859_14.c
+ oniguruma/enc/iso8859_15.c
+ oniguruma/enc/iso8859_16.c
+ oniguruma/enc/koi8.c
+ oniguruma/enc/koi8_r.c
+ oniguruma/enc/big5.c
])
fi
])
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 2a1944db83..d0fb5db869 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -610,10 +610,10 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
MBSTRG(current_internal_encoding) = no_encoding;
#if HAVE_MBREGEX
{
- php_mb_reg_char_encoding mbctype;
+ OnigEncoding mbctype;
mbctype = php_mb_regex_name2mbctype(new_value);
- if (mbctype == REGCODE_UNDEF) {
- mbctype = REGCODE_EUCJP;
+ if (mbctype == ONIG_ENCODING_UNDEF) {
+ mbctype = ONIG_ENCODING_EUC_JP;
}
MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype;
}
@@ -995,7 +995,7 @@ PHP_MINFO_FUNCTION(mbstring)
php_info_print_table_end();
php_info_print_table_start();
- php_info_print_table_colspan_header(2, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
+ php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
php_info_print_table_end();
DISPLAY_INI_ENTRIES();
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
index d9627fced7..0373fc894f 100644
--- a/ext/mbstring/oniguruma/HISTORY
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -1,11 +1,361 @@
History
+2004/02/27: Version 2.2.2
+
+2004/02/27: [impl] fix the position of onig_stat_print().
+2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h
+ for ignored by RDoc.
+
+2004/02/26: Version 2.2.1
+
+2004/02/26: [bug] invalid definition at onig_error_code_to_str()
+ in the case of NOT HAVE_STDARG_PROTOTYPES.
+
+2004/02/25: Version 2.2.0
+
+2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
+2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in
+ onigenc_is_code_ctype() in the case of Ruby M17N.
+2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16)
+2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c.
+2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14)
+2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13)
+2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10)
+2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig().
+2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9)
+2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R.
+2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex.
+2004/02/17: [spec] check capture status for empty infinite loop.
+ [ruby-dev:20224] etc...
+ ex. /(?:\1a|())*/.match("a"),
+ /(?:()|()|()|(x)|()|())*\2b\5/.match("b")
+ add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK.
+ add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH.
+ add stack type STK_NULL_CHECK_END.
+2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c.
+2004/02/13: [bug] (thanks Simon Strandgaard)
+ parsing of nested repeat was invalid.
+ ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/,
+ should be /a(?:b{2,3}*)/
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ OP_REPEAT_INC_NG process in match_at() is wrong.
+ ex. bad match /a.{0,2}?a/ =~ "0aXXXa0"
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ wrong fetch after (?x) option. ex. "(?x)\ta .\n+b"
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ [\^] is not a empty char class.
+2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(),
+ onig_set_syntax_behavior(), onig_set_syntax_options().
+2004/02/06: [dist] add a new target 'site' to Makefile.in.
+2004/02/06: [dist] add index.html.
+2004/02/03: [bug] oniggnu.h was not installed by 'make install'.
+
+2004/02/02: Version 2.1.0
+
+2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux].
+2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11)
+2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5)
+2004/02/02: [impl] should check single byte encoding or not in and_cclass()
+ and or_cclass().
+2004/01/30: [dist] add oniggnu.h.
+2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type.
+2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8)
+2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7)
+2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6)
+2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R)
+2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8)
+2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c.
+2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4)
+2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3)
+2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong.
+ (0x80 - 0xff is not ASCII)
+2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2)
+2004/01/23: [dist] add enc/isotable.c.
+2004/01/22; [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
+2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and
+ ALIGNMENT_RIGHT() was wrong.
+ type casting should be unsigned int, not int.
+2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__)
+ to unaligned word access condition. (AMD64 ?)
+2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c.
+2004/01/21; [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
+2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/01/20: [dist] change Makefile.in.
+2004/01/20: [spec] add \p{...}, \P{...} in char class.
+2004/01/20: [new] character property operators \p{...}, \P{...}.
+ supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL.
+2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/)
+2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h.
+2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch.
+2004/01/16: [dist] update sample/*.c for new API.
+2004/01/16: [dist] add onigcomp200.h. (for old API compatibility)
+2004/01/16: [dist] update documents API, RE and RE.ja.
+2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_,
+ ENC_ -> ONIGENC, enc_ -> onigenc_.
+2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD().
+ rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to
+ ENC_CTYPE_SUPPORT_LEVEL_SB.
+2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to
+ PLATFORM_UNALIGNED_WORD_ACCESS.
+2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000.
+2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h.
+ remove code_to_mbc_first member in RegCharEncodingType.
+2004/01/13: [impl] remove head byte bitset information in cclass->mbuf.
+2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for
+ escape conflict.
+
+2003/12/24: Version 2.0.0
+
+2003/12/24: [spec] ignore case option is effective to numbered char.
+ ex. /\x61/i =~ "A"
+2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux].
+2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2003/12/24: [test] success in regex.c compile test on ruby-m17n.
+ (but can't make miniruby because re.c patch fail.)
+2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5.
+2003/12/22: [spec] implement fold match on UTF-8 encoding.
+2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to
+ RegCharEncoding type.
+2003/12/19: [impl] add add_ctype_to_cc() in regparse.c.
+2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case.
+2003/12/19: [impl] change ENC_CODE_TO_MBC() interface.
+2003/12/18: [new] implement fold match. (variable number of char
+ match in ignore case mode.)
+ ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss".
+2003/12/17: [impl] refactoring of encoding system.
+2003/12/17: [impl] add enc_init() in regenc.c.
+2003/12/17: [new] support Big5. (REG_ENCODING_BIG5)
+2003/12/16: [impl] change CodePoint from unsigned int to unsigned long.
+2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15)
+2003/12/16: [impl] change P_() macro definition condition for Win32.
+2003/12/16: [dist] add sample/encode.c
+2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1)
+2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX.
+2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding.
+2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...)
+2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern().
+ change local var. type char* to UChar*.
+2003/12/15: [impl] remove REG_MBLEN_TABLE[].
+2003/12/15: [spec] rename function prefix regex_get_prev_char_head(),
+ regex_get_left_adjust_char_head() and
+ regex_get_right_adjust_char_head() to enc_xxxxxx().
+2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_.
+2003/12/12: [impl] remove USE_SBMB_CLASS.
+2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len().
+2003/12/12: [impl] rename WCINT to CodePoint.
+2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h.
+2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256].
+2003/12/11: [impl] remove RegAsciiCaseAmbigTable.
+2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison.
+2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. ***
+2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h.
+2003/12/08: [impl] add IS_ENC_WORD() to regenc.h.
+2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX().
+2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h.
+2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF.
+2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND |
+ REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF)
+ to RegSyntaxGrep.
+2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT.
+2003/12/04: [spec] move declarations of regex_get_default_encoding() and
+ regex_set_default_encoding() from oniguruma.h to regenc.h.
+2003/12/03: [new] add regex_get_default_encoding() and
+ regex_set_default_encoding().
+2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed.
+ (current default value, not initial default value.)
+2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX.
+2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"'
+2003/12/02: [spec] move regex_set_default_trans_table() declaration
+ from oniguruma.h to regenc.h. (obsoleted API)
+2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and
+ AmbiguityTable to regenc.c.
+2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c.
+2003/12/01: [dist] add regenc.h and regenc.c.
+2003/11/18: [dist] change testconv.rb.
+2003/11/18: [bug] (thanks Masaru Tsuda)
+ memory leak in parse_subexp().
+2003/11/18: [bug] (thanks Masaru Tsuda)
+ memory leak in names_clear() and parse_char_class().
+2003/11/17: [bug] memory leak in parse_char_class().
+2003/11/17: [bug] (thanks Masaru Tsuda)
+ OptExactInfo length should not over OPT_EXACT_MAXLEN.
+ (concat_opt_exact_info_str())
+
+2003/11/12: Version 1.9.5
+
+2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux].
+2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR.
+2003/11/11: [dist] add a sample program sample/sql.c.
+2003/11/11: [new] add variable meta character.
+ regex_set_meta_char()
+2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS.
+2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to
+ REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE,
+ REG_SYN_OP_QMARK_GROUP_EFFECT to
+ REG_SYN_OP2_QMARK_GROUP_EFFECT.
+2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode.
+2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT.
+2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to
+ REG_SYN_WARN_CC_OP_NOT_ESCAPED.
+2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func().
+2003/10/30: [new] add regex_name_to_backref_number().
+ (for multiplex definition name, see sample/names.c)
+2003/10/30: [spec] add name_end and reg argument to callback function of
+ regex_foreach_name(). (see sample/names.c)
+2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME.
+ add error code REGERR_MULTIPLEX_DEFINED_NAME.
+2003/10/14: [dist] modify sample/simple.c.
+2003/10/03: [bug] (thanks nobu) [ruby-dev:21472]
+ sub-anchor of optimization map info was wrong
+ in concat_left_node_opt_info().
+ ex. /^(x?y)/ = "xy" fail.
+
+2003/09/17: Version 1.9.4
+
+2003/09/17: [spec] change specification of char-class range in ignore case mode
+ follows with Ruby 1.8(2003-09-17).
+ ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i
+ ==> H-Z, h-z, 0x5b-0x60, a-c, A-C
+2003/09/16: [bug] (thanks Guy Decoux)
+ remove env->option == option check in parse_effect().
+ change env->option for dynamic option in parse_exp().
+ (ex. bad match /(?i)(?-i)a/ =~ "A")
+2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to
+ REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC,
+ REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC.
+2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR.
+2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to
+ REG_SYN_OP2_ESC_CAPITAL_M_BAR_META,
+ REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE,
+ REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP,
+ REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR,
+ REG_SYN_OP_ESC_GNU_BUF_ANCHOR to
+ REG_SYN_OP2_ESC_GNU_BUF_ANCHOR,
+ REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS,
+ REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD,
+ REG_SYN_OP_ESC_WORD_BEGIN_END to
+ REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END,
+ REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND,
+ REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE,
+ REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT,
+ REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC,
+ REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP,
+ REG_SYN_CONTEXT_INDEP_OPS to
+ REG_SYN_CONTEXT_INDEP_REPEAT_OPS,
+ REG_SYN_CONTEXT_INVALID_REPEAT_OPS to
+ REG_SYN_CONTEXT_INVALID_REPEAT_OPS.
+ add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR.
+2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR,
+ REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF,
+ REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF,
+ REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF,
+ REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF,
+ REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE,
+ REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE,
+ REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL,
+ REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL,
+ REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP,
+ REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT,
+ REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT,
+ REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY,
+ REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT,
+ REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to
+ REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL},
+ REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL,
+ REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP
+ and REG_SYN_OP2_ESC_K_NAMED_BACKREF.
+2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for
+ no-name group in noname_disable_map().
+ ex. /(a+)*(?<name>...)/
+2003/09/02: [impl] include <stdio.h> is forgotten to erase in regcomp.c.
+2003/09/01: [dist] update doc/RE and doc/RE.ja.
+2003/08/26: [bug] (thanks Guy Decoux)
+ should not double free node at the case TK_CC_CC_OPEN
+ in parse_char_class().
+
+2003/08/19: Version 1.9.3
+
+2003/08/19: [inst] change re.c.180.patch.
+2003/08/19: [impl] rename 'list of captures' to 'capture history'.
+2003/08/19: [dist] add doc/RE.ja. (Japanese)
+2003/08/19: [new] add regex_copy_syntax().
+2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to
+ REG_SYN_OP2_ATMARK_CAPTURE_HISTORY.
+2003/08/18: [spec] (thanks nobu)
+ don't use IMPORT in oniguruma.h and onigposix.h.
+2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb.
+2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in.
+2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1.
+2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0.
+2003/08/18: [bug] (thanks nobu)
+ single/multi-byte decision was wrong in parse_char_class().
+ add regex_wc2mblen().
+ should not set fetched to 1 in TK_RAW_BYTE case.
+2003/08/18: [bug] should update BitSet in the case inc_n >= 0
+ in add_wc_range_to_buf().
+2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c.
+2003/08/11: [bug] should clear region->list in regex_region_resize().
+
+2003/08/08: Version 1.9.2
+
+2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000
+ VC++ 6.0 and Cygwin.
+2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform,
+ because definition is added in win32\win32.h.
+2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux.
+2003/08/08: [dist] change re.c.180.patch and re.c.168.patch.
+2003/08/08: [new] (thanks akr)
+ implemented list of captures. (?@...), (?@<name>...)
+2003/08/07: [dist] add sample/listcap.c.
+2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at().
+ renewal of mem_start_stk[] should be after
+ STACK_PUSH_MEM_END() call.
+2003/07/29: [new] add regex_get_encoding(), regex_get_options() and
+ regex_get_syntax().
+2003/07/25: [spec] (thanks akr)
+ change group(...) to shy-group(?:...) if named group is
+ used in the pattern.
+ add REG_SYN_CAPTURE_ONLY_NAMED_GROUP.
+2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to
+ REG_OPTION_DONT_CAPTURE_GROUP.
+ add REG_OPTION_CAPTURE_GROUP.
+2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP.
+2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME.
+2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME
+ to REGERR_INVALID_CHAR_IN_GROUP_NAME.
+2003/07/17: [spec] restrict usable chars of group name to alphabet, digit,
+ '_' or multibyte-char in fetch_name(). [ruby-dev:20706]
+2003/07/16: [impl] minor change of sample/names.c.
+2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP.
+2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case.
+2003/07/14: [API] add regex_number_of_names().
+2003/07/08: [impl] change error message for undefined group number call.
+ 'undefined group reference: /(a)\g<2>/'
+ --> 'undefined group <2> reference: /(a)\g<2>/'
+2003/07/08: [dist] modify doc/RE.
+2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code.
+ add IS_DYNAMIC_OPTION() to regint.h.
+2003/07/07: [spec] called group should not ignore outside option (?i:...).
+ ex. /(?i:(?<n>(a)\2)){0}\g<n>/.match("aA")
+ add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC.
+ set option status to effect memory in optimize_node_left().
+2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and
+ OP_ANYCHAR_ML_START_PEEK_NEXT.
+2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1.
+2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE.
+
2003/07/04: Version 1.9.1
2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET)
2003/07/04: [spec] check mbuf member in the case of
REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class().
-2003/07/04: [impl] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
+2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED.
2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and
REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu)
@@ -39,7 +389,7 @@ History
2003/06/20: Version 1.9.0
-2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY.
+2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. (thanks akr)
2003/06/19: [inst] rename regex.h to oniguruma.h.
2003/06/18: [impl] change REG_EXTERN setting condition. (__CYGWIN__)
2003/06/18: [bug] return wrong result UTF-8 case in regex_mb2wc().
@@ -55,7 +405,7 @@ History
2003/06/13: [bug] should use -DIMPORT for link with DLL in win32/Makefile.
2003/06/13: [dist] add sample/names.c
2003/06/12: [bug] range should be from - 1 in not_wc_range_buf().
-2003/06/12: [spec] should warn for '-' befor '&&' operator in char-class.
+2003/06/12: [spec] should warn for '-' before '&&' operator in char-class.
2003/06/12: [new] add REG_SYNTAX_PERL.
2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]]
@@ -507,6 +857,7 @@ History
--
[bug: bug fix]
+[API: API change/new/delete]
[new: new feature]
[spec: specification change]
[impl: implementation change]
@@ -515,3 +866,6 @@ History
[dist: distribution change]
[test: test]
[memo: memo]
+--
+<create tag>
+svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
diff --git a/ext/mbstring/oniguruma/INSTALL-RUBY b/ext/mbstring/oniguruma/INSTALL-RUBY
deleted file mode 100644
index ea214b6127..0000000000
--- a/ext/mbstring/oniguruma/INSTALL-RUBY
+++ /dev/null
@@ -1,48 +0,0 @@
-INSTALL-RUBY 2003/06/12
-
-The way of installing into Ruby is shown.
-First, prepare for the source of Ruby.
-(http://www.ruby-lang.org/)
-
-A. Unix or Cygwin platform
-B. Win32 platform (VC++)
-
-
-A. Unix or Cygwin platform
-
- (in oniguruma directory)
- 1. ./configure --with-rubydir=<ruby-source-dir>
- 2. make 16 # for Ruby 1.6.8
- or
- make 18 # for Ruby 1.8.0
-
- Or you can specify ruby source directory.
- (ex. make 16 RUBYDIR=../ruby)
-
- (in ruby source directory)
- 3. ./configure (** If it doesn't go yet. **)
- 4. make clean
- 5. make
-
-
- * test (ASCII and EUC-JP)
-
- (in oniguruma directory)
- 6. make rtest
- Or you can specify ruby program directory.
- (ex. make rtest RUBYDIR=/usr/local/bin)
-
-
-B. Win32 platform (VC++)
-
- * Requirement: Visual C++, patch.exe
-
- (in oniguruma directory)
- 1. copy win32\Makefile Makefile
- 2. nmake 16 RUBYDIR=<ruby-source-dir> # for Ruby 1.6.8
- or
- nmake 18 RUBYDIR=<ruby-source-dir> # for Ruby 1.8.0
-
- 3. Follow <ruby-source-dir>\win32\README.win32 description...
-
-// END
diff --git a/ext/mbstring/oniguruma/Makefile.in b/ext/mbstring/oniguruma/Makefile.in
deleted file mode 100644
index fd79cfb24f..0000000000
--- a/ext/mbstring/oniguruma/Makefile.in
+++ /dev/null
@@ -1,188 +0,0 @@
-# Oni Guruma Makefile
-
-product_name = oniguruma
-dist_tag = `date '+%Y%m%d'`
-
-SHELL = /bin/sh
-AUTOCONF = autoconf
-
-CPPFLAGS =
-CFLAGS = @CFLAGS@ @STATISTICS@
-LDFLAGS =
-LOADLIBES =
-AR = ar
-ARFLAGS = rc
-RANLIB = @RANLIB@
-INSTALL = install -c
-CP = cp -p
-CC = @CC@
-DEFS = @DEFS@ -DNOT_RUBY
-RUBYDIR = @RUBYDIR@
-WIN32 = win32
-DOC = doc
-
-srcdir = @srcdir@
-VPATH = @srcdir@
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-libdir = $(exec_prefix)/lib
-includedir = $(prefix)/include
-
-subdirs =
-
-libname = libonig.a
-
-onigintheaders = regint.h regparse.h
-onigheaders = oniguruma.h $(onigintheaders)
-posixheaders = onigposix.h
-headers = $(posixheaders) $(onigheaders)
-
-onigobjs = regerror.o regparse.o regcomp.o regexec.o reggnu.o
-posixobjs = regposix.o regposerr.o
-libobjs = $(onigobjs) $(posixobjs)
-
-onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c
-posixsources = regposix.c regposerr.c
-libsources = $(posixsources) $(onigsources)
-rubysources = regex.c $(onigsources)
-
-patchfiles = re.c.168.patch re.c.180.patch
-distfiles = README COPYING INSTALL-RUBY HISTORY \
- .cvsignore Makefile.in configure.in config.h.in configure \
- $(headers) $(libsources) regex.c $(patchfiles) \
- test.rb testconv.rb $(testc).c
-win32distfiles = $(WIN32)/Makefile $(WIN32)/config.h $(WIN32)/testc.c
-docfiles = $(DOC)/API $(DOC)/RE
-
-samplefiles = sample/*.c
-
-testc = testc
-testp = testp
-
-makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
-
-.SUFFIXES:
-.SUFFIXES: .o .c .h .ps .dvi .info .texinfo
-
-.c.o:
- $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $<
-
-# targets
-default: all
-
-all: $(libname)
-
-$(libname): $(libobjs)
- rm -f $(libname)
- $(AR) $(ARFLAGS) $(libname) $(libobjs)
- $(RANLIB) $(libname)
-
-regparse.o: regparse.c $(onigheaders) config.h
-regcomp.o: regcomp.c $(onigheaders) config.h
-regexec.o: regexec.c regint.h oniguruma.h config.h
-reggnu.o: reggnu.c regint.h oniguruma.h config.h
-regerror.o: regerror.c regint.h oniguruma.h config.h
-regposix.o: regposix.c $(posixheaders) oniguruma.h config.h
-regposerr.o: regposerr.c $(posixheaders) config.h
-
-install: all
- test -d $(libdir) || mkdir $(libdir)
- test -d $(includedir) || mkdir $(includedir)
- $(INSTALL) $(libname) $(libdir)/$(libname)
- $(RANLIB) $(libdir)/$(libname)
- $(INSTALL) $(srcdir)/oniguruma.h $(includedir)/oniguruma.h
- $(INSTALL) $(srcdir)/onigposix.h $(includedir)/onigposix.h
-
-uninstall:
- -rm -f $(libdir)/$(libname)
- -rm -f $(includedir)/oniguruma.h
-
-# Ruby test
-rtest:
- $(RUBYDIR)/ruby -w -Ke test.rb
-
-# C library test
-ctest: $(testc)
- ./$(testc)
-
-# POSIX C library test
-ptest: $(testp)
- ./$(testp)
-
-$(testc): $(testc).c $(libname)
- $(CC) $(CFLAGS) -o $@ $(testc).c $(libname)
-
-$(testp): $(testc).c $(libname)
- $(CC) -DPOSIX_TEST $(CFLAGS) -o $@ $(testc).c $(libname)
-
-$(testc).c: test.rb testconv.rb
- ruby -Ke testconv.rb < test.rb > $@
-
-$(WIN32)/$(testc).c: test.rb testconv.rb
- ruby -Ke testconv.rb -win < test.rb | nkf -cs > $@
-
-clean:
- rm -f *.o $(libname) $(testc) $(testp) $(testc) *~ win32/*~
-
-distclean: clean
- rm -f Makefile config.status
-
-
-16: cpruby
- patch -d $(RUBYDIR) -p0 < re.c.168.patch
-
-18: cpruby
- patch -d $(RUBYDIR) -p0 < re.c.180.patch
-
-# backup file suffix
-SORIG = ruby_orig
-
-cpruby:
- $(CP) $(RUBYDIR)/regex.c $(RUBYDIR)/regex.c.$(SORIG)
- $(CP) $(RUBYDIR)/regex.h $(RUBYDIR)/regex.h.$(SORIG)
- $(CP) $(RUBYDIR)/re.c $(RUBYDIR)/re.c.$(SORIG)
- $(CP) $(rubysources) $(onigintheaders) $(RUBYDIR)
- $(CP) oniguruma.h $(RUBYDIR)/regex.h
-
-rback:
- $(CP) $(RUBYDIR)/regex.c.$(SORIG) $(RUBYDIR)/regex.c
- $(CP) $(RUBYDIR)/regex.h.$(SORIG) $(RUBYDIR)/regex.h
- $(CP) $(RUBYDIR)/re.c.$(SORIG) $(RUBYDIR)/re.c
-
-samples:
- $(CC) $(CFLAGS) -I. -o sample/simple sample/simple.c $(libname)
- $(CC) $(CFLAGS) -I. -o sample/posix sample/posix.c $(libname)
- $(CC) $(CFLAGS) -I. -o sample/names sample/names.c $(libname)
-
-configure: configure.in
- $(AUTOCONF)
-
-config.status: configure
- $(SHELL) ./config.status --recheck
-
-Makefile: Makefile.in config.status
- $(SHELL) ./config.status
-
-# Prevent GNU make 3 from overflowing arg limit on system V.
-.NOEXPORT:
-
-manifest:
- for file in $(distfiles); do echo $$file; done
-
-
-distdir = $(product_name)
-
-dist_auto: $(testc).c $(WIN32)/$(testc).c
-
-dist: configure dist_auto
- rm -rf $(distdir)
- mkdir $(distdir)
- mkdir $(distdir)/$(DOC)
- mkdir $(distdir)/$(WIN32)
- mkdir $(distdir)/sample
- ln $(distfiles) $(distdir)
- ln $(docfiles) $(distdir)/$(DOC)
- ln $(win32distfiles) $(distdir)/$(WIN32)
- ln $(samplefiles) $(distdir)/sample
- tar chf - $(distdir) | gzip > onigd$(dist_tag).tar.gz
- rm -rf $(distdir)
diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README
index 130a6f7aa4..3880423f03 100644
--- a/ext/mbstring/oniguruma/README
+++ b/ext/mbstring/oniguruma/README
@@ -1,30 +1,43 @@
-README 2003/07/04
+README 2004/02/25
Oniguruma ---- (C) K.Kosako <kosako@sofnec.co.jp>
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
+http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
-Oniguruma is a regular expression library.
-The characteristics of this library is that different character encodings
+Oniguruma is a regular expressions library.
+The characteristics of this library is that different character encoding
for every regular expression object can be specified.
-(Supported character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
+
+Supported character encodings:
+
+ ASCII, UTF-8,
+ EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+ Shift_JIS, Big5, KOI8, KOI8-R,
+ ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+
There are two ways of using of it in this program.
* Built-in regular expression engine of Ruby
* C library (supported APIs: GNU regex, POSIX, Oniguruma native)
+------------------------------------------------------------
Install
-A. Install into Ruby
+(A) Install into Ruby
See INSTALL-RUBY.
+ (character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
+
-B. C library
+(B) Install C library
- B1. Unix, Cygwin
+ (B-1) Unix and Cygwin platform
1. ./configure
2. make
@@ -32,20 +45,20 @@ B. C library
(* uninstall: make uninstall)
- * test (EUC-JP)
+ * test (ASCII/EUC-JP)
4. make ctest
- B2. Win32 platform (VC++)
+ (B-2) Win32 platform (VC++)
- 1. copy win32\config.h config.h
- 2. copy win32\Makefile Makefile
+ 1. copy win32\Makefile Makefile
+ 2. copy win32\config.h config.h
3. nmake
- onig_s.lib: static link library
- onig.dll: dynamic link library
+ onig_s.lib: static link library
+ onig.dll: dynamic link library
- * test (Shift_JIS)
+ * test (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
@@ -58,30 +71,72 @@ License
It follows the BSD license in the case of the one except for it.
-Source Files
- oniguruma.h Oniguruma and GNU regex API header file
- regint.h internal definitions
- regparse.h internal definitions for regparse.c and regcomp.c
- regparse.c parsing functions.
- regcomp.c compiling and optimization functions
- regerror.c error message function
- regex.c source files wrapper for Ruby
- regexec.c search and match functions
- reggnu.c GNU regex API functions
+Regular Expressions
+
+ See doc/RE (or doc/RE.ja for Japanese).
+
- onigposix.h POSIX API header file
- regposerr.c POSIX API error message function (regerror)
- regposix.c POSIX API functions
+Sample Programs
sample/simple.c example of the minimum (native API)
- sample/posix.c POSIX API sample.
sample/names.c example of the named group callback.
+ sample/encode.c example of some encodings.
+ sample/listcap.c example of the capture history.
+ sample/posix.c POSIX API sample.
+ sample/sql.c example of the variable meta characters.
+ (SQL-like pattern matching)
+
+Source Files
-Regular expression
+ oniguruma.h Oniguruma API header file. (public)
+ oniggnu.h GNU regex API header file. (public)
+ onigcmpt200.h Oniguruma API backward compatibility header file. (public)
+ (for 2.0.0 or more older version)
+
+ regenc.h character encodings framework header file.
+ regint.h internal definitions
+ regparse.h internal definitions for regparse.c and regcomp.c
+ regcomp.c compiling and optimization functions
+ regenc.c character encodings framework.
+ regerror.c error message function
+ regex.c source files wrapper for Ruby
+ regexec.c search and match functions
+ regparse.c parsing functions.
+ reggnu.c GNU regex API functions
+
+ onigposix.h POSIX API header file. (public)
+ regposerr.c POSIX error message function.
+ regposix.c POSIX functions.
+
+ enc/mktable.c character type table generator.
+ enc/ascii.c ASCII encoding.
+ enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
+ enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
+ enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
+ enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
+ enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
+ enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
+ enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
+ enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
+ enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
+ enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
+ enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
+ enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
+ enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
+ enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
+ enc/iso8859_16.c ISO-8859-16 encoding.
+ (Latin-10 or South-Eastern European with Euro)
+ enc/utf8.c UTF-8 encoding.
+ enc/euc_jp.c EUC-JP encoding.
+ enc/euc_tw.c EUC-TW encoding.
+ enc/euc_kr.c EUC-KR, EUC-CN encoding.
+ enc/sjis.c Shift_JIS encoding.
+ enc/koi8.c KOI8 encoding.
+ enc/koi8_r.c KOI8-R encoding.
+ enc/big5.c Big5 encoding.
- See doc/RE.
API differences with Japanized GNU regex(version 0.12) of Ruby
@@ -93,22 +148,18 @@ API differences with Japanized GNU regex(version 0.12) of Ruby
ToDo
- 1 support 16-bit and 31-bit encodings. (UCS-2, UCS-4, UTF-16)
- (each encoding has meta-character code table?)
-
- 2 if-then-else. (?(condition)then), (?(condition)then|else)
+ 1 support 16-bit encodings. (UTF-16)
+ 2 different encoding pattern with target.
+ (ex. ASCII/UTF-16, UTF-16 BE and UTF-16 LE)
+ 3 add enc/name.c (onigenc_get_enc_by_name(name))
- ? variable meta characters.
- ? implement syntax behavior REG_SYN_CONTEXT_INDEP_ANCHORS.
- ? pattern encoding different with target.
- (ex. UCS-2 Big Endian and UCS-2 Little Endian)
- ? better acess to hash table.
+ ? transmission stopper. (return ONIG_STOP from match_at())
+ ? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
+ ? better acess to hash table (st.c).
non null-terminated key version st_lookup().
- (but it needs to modify st.[ch])
- ? character set specific POSIX bracket extensions. ([:hiragana:])
- ? grep-like tool 'onigrep'. (variable syntax option etc..)
- ? check invalid wide char value in WC2MB, WC2MB_FIRST on Ruby M17N.
- ? define THREAD_PASS in regint.h as rb_thread_pass().
-
+ ? grep-like tool 'onigrep'.
+ ? return parse tree of regexp pattern to application.
+ ?? /a{n}?/ should be interpreted as /(?:a{n})?/.
+ ?? \h hexadecimal digit char ([0-9a-fA-F]), \H not \h.
and I'm thankful to Akinori MUSHA.
diff --git a/ext/mbstring/oniguruma/configure b/ext/mbstring/oniguruma/configure
new file mode 100755
index 0000000000..a63327d854
--- /dev/null
+++ b/ext/mbstring/oniguruma/configure
@@ -0,0 +1,5414 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.53.
+#
+# Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+
+# NLS nuisances.
+# Support unset when possible.
+if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+(set +x; test -n "`(LANG=C; export LANG) 2>&1`") &&
+ { $as_unset LANG || test "${LANG+set}" != set; } ||
+ { LANG=C; export LANG; }
+(set +x; test -n "`(LC_ALL=C; export LC_ALL) 2>&1`") &&
+ { $as_unset LC_ALL || test "${LC_ALL+set}" != set; } ||
+ { LC_ALL=C; export LC_ALL; }
+(set +x; test -n "`(LC_TIME=C; export LC_TIME) 2>&1`") &&
+ { $as_unset LC_TIME || test "${LC_TIME+set}" != set; } ||
+ { LC_TIME=C; export LC_TIME; }
+(set +x; test -n "`(LC_CTYPE=C; export LC_CTYPE) 2>&1`") &&
+ { $as_unset LC_CTYPE || test "${LC_CTYPE+set}" != set; } ||
+ { LC_CTYPE=C; export LC_CTYPE; }
+(set +x; test -n "`(LANGUAGE=C; export LANGUAGE) 2>&1`") &&
+ { $as_unset LANGUAGE || test "${LANGUAGE+set}" != set; } ||
+ { LANGUAGE=C; export LANGUAGE; }
+(set +x; test -n "`(LC_COLLATE=C; export LC_COLLATE) 2>&1`") &&
+ { $as_unset LC_COLLATE || test "${LC_COLLATE+set}" != set; } ||
+ { LC_COLLATE=C; export LC_COLLATE; }
+(set +x; test -n "`(LC_NUMERIC=C; export LC_NUMERIC) 2>&1`") &&
+ { $as_unset LC_NUMERIC || test "${LC_NUMERIC+set}" != set; } ||
+ { LC_NUMERIC=C; export LC_NUMERIC; }
+(set +x; test -n "`(LC_MESSAGES=C; export LC_MESSAGES) 2>&1`") &&
+ { $as_unset LC_MESSAGES || test "${LC_MESSAGES+set}" != set; } ||
+ { LC_MESSAGES=C; export LC_MESSAGES; }
+
+
+# Name of the executable.
+as_me=`(basename "$0") 2>/dev/null ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conftest.sh
+ echo "exit 0" >>conftest.sh
+ chmod +x conftest.sh
+ if (PATH=".;."; conftest.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conftest.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH || test "${CDPATH+set}" != set || { CDPATH=$PATH_SEPARATOR; export CDPATH; }
+
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+exec 6>&1
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Maximum number of lines to put in a shell here document.
+# This variable seems obsolete. It should probably be removed, and
+# only ac_max_sed_lines should be used.
+: ${ac_max_here_lines=38}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_unique_file="regex.c"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_option in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ eval "enable_$ac_feature=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_$ac_feature='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_$ac_package='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/-/_/g'`
+ eval "with_$ac_package=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ { echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute paths.
+for ac_var in exec_prefix prefix
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# Be sure to have absolute paths.
+for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
+ localstatedir libdir includedir oldincludedir infodir mandir
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_confdir=`(dirname "$0") 2>/dev/null ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$0" : 'X\(//\)[^/]' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$0" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
+ { (exit 1); exit 1; }; }
+ else
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
+ fi
+fi
+srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
+ac_env_build_alias_set=${build_alias+set}
+ac_env_build_alias_value=$build_alias
+ac_cv_env_build_alias_set=${build_alias+set}
+ac_cv_env_build_alias_value=$build_alias
+ac_env_host_alias_set=${host_alias+set}
+ac_env_host_alias_value=$host_alias
+ac_cv_env_host_alias_set=${host_alias+set}
+ac_cv_env_host_alias_value=$host_alias
+ac_env_target_alias_set=${target_alias+set}
+ac_env_target_alias_value=$target_alias
+ac_cv_env_target_alias_set=${target_alias+set}
+ac_cv_env_target_alias_value=$target_alias
+ac_env_CC_set=${CC+set}
+ac_env_CC_value=$CC
+ac_cv_env_CC_set=${CC+set}
+ac_cv_env_CC_value=$CC
+ac_env_CFLAGS_set=${CFLAGS+set}
+ac_env_CFLAGS_value=$CFLAGS
+ac_cv_env_CFLAGS_set=${CFLAGS+set}
+ac_cv_env_CFLAGS_value=$CFLAGS
+ac_env_LDFLAGS_set=${LDFLAGS+set}
+ac_env_LDFLAGS_value=$LDFLAGS
+ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
+ac_cv_env_LDFLAGS_value=$LDFLAGS
+ac_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_env_CPPFLAGS_value=$CPPFLAGS
+ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_cv_env_CPPFLAGS_value=$CPPFLAGS
+ac_env_CPP_set=${CPP+set}
+ac_env_CPP_value=$CPP
+ac_cv_env_CPP_set=${CPP+set}
+ac_cv_env_CPP_value=$CPP
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+_ACEOF
+
+ cat <<_ACEOF
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --infodir=DIR info documentation [PREFIX/info]
+ --mandir=DIR man documentation [PREFIX/man]
+_ACEOF
+
+ cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Optional Packages:
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)
+ --with-statistics take matching time statistical data
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
+ headers in a nonstandard directory <include dir>
+ CPP C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ ac_popdir=`pwd`
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d $ac_dir || continue
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
+# absolute.
+ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
+ac_abs_top_builddir=`cd "$ac_dir" && cd $ac_top_builddir && pwd`
+ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
+ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
+
+ cd $ac_dir
+ # Check for guested configure; otherwise get Cygnus style configure.
+ if test -f $ac_srcdir/configure.gnu; then
+ echo
+ $SHELL $ac_srcdir/configure.gnu --help=recursive
+ elif test -f $ac_srcdir/configure; then
+ echo
+ $SHELL $ac_srcdir/configure --help=recursive
+ elif test -f $ac_srcdir/configure.ac ||
+ test -f $ac_srcdir/configure.in; then
+ echo
+ $ac_configure --help
+ else
+ echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi
+ cd $ac_popdir
+ done
+fi
+
+test -n "$ac_init_help" && exit 0
+if $ac_init_version; then
+ cat <<\_ACEOF
+
+Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
+Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit 0
+fi
+exec 5>config.log
+cat >&5 <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.53. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ echo "PATH: $as_dir"
+done
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Also quote any args containing shell meta-characters.
+ac_configure_args=
+ac_sep=
+for ac_arg
+do
+ case $ac_arg in
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n ) continue ;;
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ continue ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
+ ac_sep=" " ;;
+ esac
+ # Get rid of the leading space.
+done
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Be sure not to use single quotes in there, as some shells,
+# such as our DU 5.0 friend, will then `close' the trap.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+ cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+{
+ (set) 2>&1 |
+ case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ sed -n \
+ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
+ ;;
+ *)
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+}
+ echo
+ if test -s confdefs.h; then
+ cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+ echo
+ sed "/^$/d" confdefs.h
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ echo "$as_me: caught signal $ac_signal"
+ echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core core.* *.core &&
+ rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+ ' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . $cache_file;;
+ *) . ./$cache_file;;
+ esac
+ fi
+else
+ { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in `(set) 2>&1 |
+ sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val="\$ac_cv_env_${ac_var}_value"
+ eval ac_new_val="\$ac_env_${ac_var}_value"
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
+echo "$as_me: former value: $ac_old_val" >&2;}
+ { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
+echo "$as_me: current value: $ac_new_val" >&2;}
+ ac_cache_corrupted=:
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ac_config_headers="$ac_config_headers config.h"
+
+
+RUBYDIR=".."
+
+# Check whether --with-rubydir or --without-rubydir was given.
+if test "${with_rubydir+set}" = set; then
+ withval="$with_rubydir"
+ RUBYDIR=$withval
+fi;
+
+
+STATISTICS=""
+
+# Check whether --with-statistics or --without-statistics was given.
+if test "${with_statistics+set}" = set; then
+ withval="$with_statistics"
+ STATISTICS=-DREG_DEBUG_STATISTICS
+fi;
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ set dummy "$as_dir/$ac_word" ${1+"$@"}
+ shift
+ ac_cv_prog_CC="$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$ac_ct_CC" && break
+done
+
+ CC=$ac_ct_CC
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH" >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH" >&2;}
+ { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO:" \
+ "checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
+ (eval $ac_compiler --version </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
+ (eval $ac_compiler -v </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
+ (eval $ac_compiler -V </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+echo "$as_me:$LINENO: checking for C compiler default output" >&5
+echo $ECHO_N "checking for C compiler default output... $ECHO_C" >&6
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
+ (eval $ac_link_default) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # Find the output, starting from the most likely. This scheme is
+# not robust to junk in `.', hence go to wildcards (a.*) only as a last
+# resort.
+
+# Be careful to initialize this variable, since it used to be cached.
+# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
+ac_cv_exeext=
+for ac_file in `ls a_out.exe a.exe conftest.exe 2>/dev/null;
+ ls a.out conftest 2>/dev/null;
+ ls a.* conftest.* 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.o | *.obj | *.xcoff | *.tds | *.d | *.pdb | *.xSYM ) ;;
+ a.out ) # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ # FIXME: I believe we export ac_cv_exeext for Libtool --akim.
+ export ac_cv_exeext
+ break;;
+ * ) break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables" >&5
+echo "$as_me: error: C compiler cannot create executables" >&2;}
+ { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6
+
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'." >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ fi
+fi
+echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+rm -f a.out a.exe conftest$ac_cv_exeext
+ac_clean_files=$ac_clean_files_save
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
+echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6
+
+echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in `(ls conftest.exe; ls conftest; ls conftest.*) 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.o | *.obj | *.xcoff | *.tds | *.d | *.pdb ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ export ac_cv_exeext
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link" >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile" >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_compiler_gnu=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+CFLAGS="-g"
+echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_prog_cc_g=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+# Some people use a C++ compiler to compile C. Since we use `exit',
+# in C++ we need to declare it. In case someone uses the same compiler
+# for both compiling C and C++ we need to have the C++ compiler decide
+# the declaration of exit, since it's the most demanding environment.
+cat >conftest.$ac_ext <<_ACEOF
+#ifndef __cplusplus
+ choke me
+#endif
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ for ac_declaration in \
+ ''\
+ '#include <stdlib.h>' \
+ 'extern "C" void std::exit (int) throw (); using std::exit;' \
+ 'extern "C" void std::exit (int); using std::exit;' \
+ 'extern "C" void exit (int) throw ();' \
+ 'extern "C" void exit (int);' \
+ 'void exit (int);'
+do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+$ac_declaration
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+continue
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_declaration
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+done
+rm -f conftest*
+if test -n "$ac_declaration"; then
+ echo '#ifdef __cplusplus' >>confdefs.h
+ echo $ac_declaration >>confdefs.h
+ echo '#endif' >>confdefs.h
+fi
+
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$RANLIB"; then
+ ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+ echo "$as_me:$LINENO: result: $RANLIB" >&5
+echo "${ECHO_T}$RANLIB" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+ ac_ct_RANLIB=$RANLIB
+ # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_RANLIB"; then
+ ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_RANLIB="ranlib"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+ test -z "$ac_cv_prog_ac_ct_RANLIB" && ac_cv_prog_ac_ct_RANLIB=":"
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+ echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5
+echo "${ECHO_T}$ac_ct_RANLIB" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ RANLIB=$ac_ct_RANLIB
+else
+ RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <assert.h>
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <assert.h>
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ :
+else
+ { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check" >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ ac_cv_header_stdc=yes
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ ac_cv_header_stdc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+eval "$as_ac_Header=no"
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+
+for ac_header in stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+ # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_header_compiler=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ egrep -v '^ *\+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc in
+ yes:no )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;};;
+ no:yes )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;};;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ eval "$as_ac_Header=$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+echo "$as_me:$LINENO: checking for int" >&5
+echo $ECHO_N "checking for int... $ECHO_C" >&6
+if test "${ac_cv_type_int+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+if ((int *) 0)
+ return 0;
+if (sizeof (int))
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_type_int=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_type_int=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_type_int" >&5
+echo "${ECHO_T}$ac_cv_type_int" >&6
+
+echo "$as_me:$LINENO: checking size of int" >&5
+echo $ECHO_N "checking size of int... $ECHO_C" >&6
+if test "${ac_cv_sizeof_int+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$ac_cv_type_int" = yes; then
+ # The cast to unsigned long works around a bug in the HP C Compiler
+ # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+ # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+ # This bug is HP SR number 8606223364.
+ if test "$cross_compiling" = yes; then
+ # Depending upon the size, compute the lo and hi bounds.
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (int))) >= 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=0 ac_mid=0
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (int))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr $ac_mid + 1`
+ if test $ac_lo -le $ac_mid; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (int))) < 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=-1 ac_mid=-1
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (int))) >= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_hi=`expr '(' $ac_mid ')' - 1`
+ if test $ac_mid -le $ac_hi; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo= ac_hi=
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+ ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (int))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr '(' $ac_mid ')' + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+done
+case $ac_lo in
+?*) ac_cv_sizeof_int=$ac_lo;;
+'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (int), 77" >&5
+echo "$as_me: error: cannot compute sizeof (int), 77" >&2;}
+ { (exit 1); exit 1; }; } ;;
+esac
+else
+ if test "$cross_compiling" = yes; then
+ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5
+echo "$as_me: error: cannot run test program while cross compiling" >&2;}
+ { (exit 1); exit 1; }; }
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+long longval () { return (long) (sizeof (int)); }
+unsigned long ulongval () { return (long) (sizeof (int)); }
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ FILE *f = fopen ("conftest.val", "w");
+ if (! f)
+ exit (1);
+ if (((long) (sizeof (int))) < 0)
+ {
+ long i = longval ();
+ if (i != ((long) (sizeof (int))))
+ exit (1);
+ fprintf (f, "%ld\n", i);
+ }
+ else
+ {
+ unsigned long i = ulongval ();
+ if (i != ((long) (sizeof (int))))
+ exit (1);
+ fprintf (f, "%lu\n", i);
+ }
+ exit (ferror (f) || fclose (f) != 0);
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_sizeof_int=`cat conftest.val`
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+{ { echo "$as_me:$LINENO: error: cannot compute sizeof (int), 77" >&5
+echo "$as_me: error: cannot compute sizeof (int), 77" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+rm -f conftest.val
+else
+ ac_cv_sizeof_int=0
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_sizeof_int" >&5
+echo "${ECHO_T}$ac_cv_sizeof_int" >&6
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_INT $ac_cv_sizeof_int
+_ACEOF
+
+
+echo "$as_me:$LINENO: checking for short" >&5
+echo $ECHO_N "checking for short... $ECHO_C" >&6
+if test "${ac_cv_type_short+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+if ((short *) 0)
+ return 0;
+if (sizeof (short))
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_type_short=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_type_short=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_type_short" >&5
+echo "${ECHO_T}$ac_cv_type_short" >&6
+
+echo "$as_me:$LINENO: checking size of short" >&5
+echo $ECHO_N "checking size of short... $ECHO_C" >&6
+if test "${ac_cv_sizeof_short+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$ac_cv_type_short" = yes; then
+ # The cast to unsigned long works around a bug in the HP C Compiler
+ # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+ # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+ # This bug is HP SR number 8606223364.
+ if test "$cross_compiling" = yes; then
+ # Depending upon the size, compute the lo and hi bounds.
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (short))) >= 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=0 ac_mid=0
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (short))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr $ac_mid + 1`
+ if test $ac_lo -le $ac_mid; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (short))) < 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=-1 ac_mid=-1
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (short))) >= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_hi=`expr '(' $ac_mid ')' - 1`
+ if test $ac_mid -le $ac_hi; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo= ac_hi=
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+ ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (short))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr '(' $ac_mid ')' + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+done
+case $ac_lo in
+?*) ac_cv_sizeof_short=$ac_lo;;
+'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (short), 77" >&5
+echo "$as_me: error: cannot compute sizeof (short), 77" >&2;}
+ { (exit 1); exit 1; }; } ;;
+esac
+else
+ if test "$cross_compiling" = yes; then
+ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5
+echo "$as_me: error: cannot run test program while cross compiling" >&2;}
+ { (exit 1); exit 1; }; }
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+long longval () { return (long) (sizeof (short)); }
+unsigned long ulongval () { return (long) (sizeof (short)); }
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ FILE *f = fopen ("conftest.val", "w");
+ if (! f)
+ exit (1);
+ if (((long) (sizeof (short))) < 0)
+ {
+ long i = longval ();
+ if (i != ((long) (sizeof (short))))
+ exit (1);
+ fprintf (f, "%ld\n", i);
+ }
+ else
+ {
+ unsigned long i = ulongval ();
+ if (i != ((long) (sizeof (short))))
+ exit (1);
+ fprintf (f, "%lu\n", i);
+ }
+ exit (ferror (f) || fclose (f) != 0);
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_sizeof_short=`cat conftest.val`
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+{ { echo "$as_me:$LINENO: error: cannot compute sizeof (short), 77" >&5
+echo "$as_me: error: cannot compute sizeof (short), 77" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+rm -f conftest.val
+else
+ ac_cv_sizeof_short=0
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_sizeof_short" >&5
+echo "${ECHO_T}$ac_cv_sizeof_short" >&6
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_SHORT $ac_cv_sizeof_short
+_ACEOF
+
+
+echo "$as_me:$LINENO: checking for long" >&5
+echo $ECHO_N "checking for long... $ECHO_C" >&6
+if test "${ac_cv_type_long+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+if ((long *) 0)
+ return 0;
+if (sizeof (long))
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_type_long=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_type_long=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_type_long" >&5
+echo "${ECHO_T}$ac_cv_type_long" >&6
+
+echo "$as_me:$LINENO: checking size of long" >&5
+echo $ECHO_N "checking size of long... $ECHO_C" >&6
+if test "${ac_cv_sizeof_long+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$ac_cv_type_long" = yes; then
+ # The cast to unsigned long works around a bug in the HP C Compiler
+ # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+ # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+ # This bug is HP SR number 8606223364.
+ if test "$cross_compiling" = yes; then
+ # Depending upon the size, compute the lo and hi bounds.
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (long))) >= 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=0 ac_mid=0
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (long))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr $ac_mid + 1`
+ if test $ac_lo -le $ac_mid; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (long))) < 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=-1 ac_mid=-1
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (long))) >= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_lo=$ac_mid; break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_hi=`expr '(' $ac_mid ')' - 1`
+ if test $ac_mid -le $ac_hi; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo= ac_hi=
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+ ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long) (sizeof (long))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_hi=$ac_mid
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_lo=`expr '(' $ac_mid ')' + 1`
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+done
+case $ac_lo in
+?*) ac_cv_sizeof_long=$ac_lo;;
+'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (long), 77" >&5
+echo "$as_me: error: cannot compute sizeof (long), 77" >&2;}
+ { (exit 1); exit 1; }; } ;;
+esac
+else
+ if test "$cross_compiling" = yes; then
+ { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling" >&5
+echo "$as_me: error: cannot run test program while cross compiling" >&2;}
+ { (exit 1); exit 1; }; }
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+$ac_includes_default
+long longval () { return (long) (sizeof (long)); }
+unsigned long ulongval () { return (long) (sizeof (long)); }
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ FILE *f = fopen ("conftest.val", "w");
+ if (! f)
+ exit (1);
+ if (((long) (sizeof (long))) < 0)
+ {
+ long i = longval ();
+ if (i != ((long) (sizeof (long))))
+ exit (1);
+ fprintf (f, "%ld\n", i);
+ }
+ else
+ {
+ unsigned long i = ulongval ();
+ if (i != ((long) (sizeof (long))))
+ exit (1);
+ fprintf (f, "%lu\n", i);
+ }
+ exit (ferror (f) || fclose (f) != 0);
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_sizeof_long=`cat conftest.val`
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+{ { echo "$as_me:$LINENO: error: cannot compute sizeof (long), 77" >&5
+echo "$as_me: error: cannot compute sizeof (long), 77" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+rm -f conftest.val
+else
+ ac_cv_sizeof_long=0
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_sizeof_long" >&5
+echo "${ECHO_T}$ac_cv_sizeof_long" >&6
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_LONG $ac_cv_sizeof_long
+_ACEOF
+
+
+echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5
+echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_prog_cc_stdc=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_stdc=$ac_arg
+break
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+fi
+rm -f conftest.$ac_objext
+done
+rm -f conftest.$ac_ext conftest.$ac_objext
+CC=$ac_save_CC
+
+fi
+
+case "x$ac_cv_prog_cc_stdc" in
+ x|xno)
+ echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6 ;;
+ *)
+ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6
+ CC="$CC $ac_cv_prog_cc_stdc" ;;
+esac
+
+echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5
+echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6
+if test "${ac_cv_c_const+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+/* FIXME: Include the comments suggested by Paul. */
+#ifndef __cplusplus
+ /* Ultrix mips cc rejects this. */
+ typedef int charset[2];
+ const charset x;
+ /* SunOS 4.1.1 cc rejects this. */
+ char const *const *ccp;
+ char **p;
+ /* NEC SVR4.0.2 mips cc rejects this. */
+ struct point {int x, y;};
+ static struct point const zero = {0,0};
+ /* AIX XL C 1.02.0.0 rejects this.
+ It does not let you subtract one const X* pointer from another in
+ an arm of an if-expression whose if-part is not a constant
+ expression */
+ const char *g = "string";
+ ccp = &g + (g ? g-g : 0);
+ /* HPUX 7.0 cc rejects these. */
+ ++ccp;
+ p = (char**) ccp;
+ ccp = (char const *const *) p;
+ { /* SCO 3.2v4 cc rejects this. */
+ char *t;
+ char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+ *t++ = 0;
+ }
+ { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */
+ int x[] = {25, 17};
+ const int *foo = &x[0];
+ ++foo;
+ }
+ { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+ typedef const int *iptr;
+ iptr p = 0;
+ ++p;
+ }
+ { /* AIX XL C 1.02.0.0 rejects this saying
+ "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+ struct s { int j; const int *ap[3]; };
+ struct s *b; b->j = 5;
+ }
+ { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+ const int foo = 10;
+ }
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_c_const=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_c_const=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5
+echo "${ECHO_T}$ac_cv_c_const" >&6
+if test $ac_cv_c_const = no; then
+
+cat >>confdefs.h <<\_ACEOF
+#define const
+_ACEOF
+
+fi
+
+echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5
+echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6
+if test "${ac_cv_header_time+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+if ((struct tm *) 0)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_header_time=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_header_time=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_header_time" >&5
+echo "${ECHO_T}$ac_cv_header_time" >&6
+if test $ac_cv_header_time = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TIME_WITH_SYS_TIME 1
+_ACEOF
+
+fi
+
+
+# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
+# for constant arguments. Useless!
+echo "$as_me:$LINENO: checking for working alloca.h" >&5
+echo $ECHO_N "checking for working alloca.h... $ECHO_C" >&6
+if test "${ac_cv_working_alloca_h+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#include <alloca.h>
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+char *p = (char *) alloca (2 * sizeof (int));
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_working_alloca_h=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_working_alloca_h=no
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_working_alloca_h" >&5
+echo "${ECHO_T}$ac_cv_working_alloca_h" >&6
+if test $ac_cv_working_alloca_h = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ALLOCA_H 1
+_ACEOF
+
+fi
+
+echo "$as_me:$LINENO: checking for alloca" >&5
+echo $ECHO_N "checking for alloca... $ECHO_C" >&6
+if test "${ac_cv_func_alloca_works+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#ifdef __GNUC__
+# define alloca __builtin_alloca
+#else
+# ifdef _MSC_VER
+# include <malloc.h>
+# define alloca _alloca
+# else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+# endif
+#endif
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+char *p = (char *) alloca (1);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_func_alloca_works=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+ac_cv_func_alloca_works=no
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_func_alloca_works" >&5
+echo "${ECHO_T}$ac_cv_func_alloca_works" >&6
+
+if test $ac_cv_func_alloca_works = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ALLOCA 1
+_ACEOF
+
+else
+ # The SVR3 libPW and SVR4 libucb both contain incompatible functions
+# that cause trouble. Some versions do not even contain alloca or
+# contain a buggy version. If you still want to use their alloca,
+# use ar to extract alloca.o from them instead of compiling alloca.c.
+
+ALLOCA=alloca.$ac_objext
+
+cat >>confdefs.h <<\_ACEOF
+#define C_ALLOCA 1
+_ACEOF
+
+
+echo "$as_me:$LINENO: checking whether \`alloca.c' needs Cray hooks" >&5
+echo $ECHO_N "checking whether \`alloca.c' needs Cray hooks... $ECHO_C" >&6
+if test "${ac_cv_os_cray+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+#if defined(CRAY) && ! defined(CRAY2)
+webecray
+#else
+wenotbecray
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "webecray" >/dev/null 2>&1; then
+ ac_cv_os_cray=yes
+else
+ ac_cv_os_cray=no
+fi
+rm -f conftest*
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_os_cray" >&5
+echo "${ECHO_T}$ac_cv_os_cray" >&6
+if test $ac_cv_os_cray = yes; then
+ for ac_func in _getb67 GETB67 getb67; do
+ as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
+if eval "test \"\${$as_ac_var+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+#ifdef __cplusplus
+extern "C"
+#endif
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+char $ac_func ();
+char (*f) ();
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+f = $ac_func;
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+eval "$as_ac_var=no"
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define CRAY_STACKSEG_END $ac_func
+_ACEOF
+
+ break
+fi
+
+ done
+fi
+
+echo "$as_me:$LINENO: checking stack direction for C alloca" >&5
+echo $ECHO_N "checking stack direction for C alloca... $ECHO_C" >&6
+if test "${ac_cv_c_stack_direction+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_c_stack_direction=0
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+int
+find_stack_direction ()
+{
+ static char *addr = 0;
+ auto char dummy;
+ if (addr == 0)
+ {
+ addr = &dummy;
+ return find_stack_direction ();
+ }
+ else
+ return (&dummy > addr) ? 1 : -1;
+}
+
+int
+main ()
+{
+ exit (find_stack_direction () < 0);
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_c_stack_direction=1
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+ac_cv_c_stack_direction=-1
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_stack_direction" >&5
+echo "${ECHO_T}$ac_cv_c_stack_direction" >&6
+
+cat >>confdefs.h <<_ACEOF
+#define STACK_DIRECTION $ac_cv_c_stack_direction
+_ACEOF
+
+
+fi
+
+echo "$as_me:$LINENO: checking for working memcmp" >&5
+echo $ECHO_N "checking for working memcmp... $ECHO_C" >&6
+if test "${ac_cv_func_memcmp_working+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_func_memcmp_working=no
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+
+ /* Some versions of memcmp are not 8-bit clean. */
+ char c0 = 0x40, c1 = 0x80, c2 = 0x81;
+ if (memcmp(&c0, &c2, 1) >= 0 || memcmp(&c1, &c2, 1) >= 0)
+ exit (1);
+
+ /* The Next x86 OpenStep bug shows up only when comparing 16 bytes
+ or more and with at least one buffer not starting on a 4-byte boundary.
+ William Lewis provided this test program. */
+ {
+ char foo[21];
+ char bar[21];
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ char *a = foo + i;
+ char *b = bar + i;
+ strcpy (a, "--------01111111");
+ strcpy (b, "--------10000000");
+ if (memcmp (a, b, 16) >= 0)
+ exit (1);
+ }
+ exit (0);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_func_memcmp_working=yes
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+( exit $ac_status )
+ac_cv_func_memcmp_working=no
+fi
+rm -f core core.* *.core conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_func_memcmp_working" >&5
+echo "${ECHO_T}$ac_cv_func_memcmp_working" >&6
+test $ac_cv_func_memcmp_working = no && LIBOBJS="$LIBOBJS memcmp.$ac_objext"
+
+
+echo "$as_me:$LINENO: checking for prototypes" >&5
+echo $ECHO_N "checking for prototypes... $ECHO_C" >&6
+if test "${cv_have_prototypes+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+int foo(int x) { return 0; }
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+return foo(10);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cv_have_prototypes=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+cv_have_prototypes=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $cv_have_prototypes" >&5
+echo "${ECHO_T}$cv_have_prototypes" >&6
+if test "$cv_have_prototypes" = yes; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_PROTOTYPES 1
+_ACEOF
+
+fi
+
+echo "$as_me:$LINENO: checking for variable length prototypes and stdarg.h" >&5
+echo $ECHO_N "checking for variable length prototypes and stdarg.h... $ECHO_C" >&6
+if test "${cv_stdarg+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#include <stdarg.h>
+int foo(int x, ...) {
+ va_list va;
+ va_start(va, x);
+ va_arg(va, int);
+ va_arg(va, char *);
+ va_arg(va, double);
+ return 0;
+}
+
+#ifdef F77_DUMMY_MAIN
+# ifdef __cplusplus
+ extern "C"
+# endif
+ int F77_DUMMY_MAIN() { return 1; }
+#endif
+int
+main ()
+{
+return foo(10, "", 3.14);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cv_stdarg=yes
+else
+ echo "$as_me: failed program was:" >&5
+cat conftest.$ac_ext >&5
+cv_stdarg=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $cv_stdarg" >&5
+echo "${ECHO_T}$cv_stdarg" >&6
+if test "$cv_stdarg" = yes; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_STDARG_PROTOTYPES 1
+_ACEOF
+
+fi
+
+
+
+ac_config_files="$ac_config_files Makefile"
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overriden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+{
+ (set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+} |
+ sed '
+ t clear
+ : clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ : end' >>confcache
+if cmp -s $cache_file confcache; then :; else
+ if test -w $cache_file; then
+ test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[ ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[ ]*$//;
+}'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+
+# NLS nuisances.
+# Support unset when possible.
+if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+(set +x; test -n "`(LANG=C; export LANG) 2>&1`") &&
+ { $as_unset LANG || test "${LANG+set}" != set; } ||
+ { LANG=C; export LANG; }
+(set +x; test -n "`(LC_ALL=C; export LC_ALL) 2>&1`") &&
+ { $as_unset LC_ALL || test "${LC_ALL+set}" != set; } ||
+ { LC_ALL=C; export LC_ALL; }
+(set +x; test -n "`(LC_TIME=C; export LC_TIME) 2>&1`") &&
+ { $as_unset LC_TIME || test "${LC_TIME+set}" != set; } ||
+ { LC_TIME=C; export LC_TIME; }
+(set +x; test -n "`(LC_CTYPE=C; export LC_CTYPE) 2>&1`") &&
+ { $as_unset LC_CTYPE || test "${LC_CTYPE+set}" != set; } ||
+ { LC_CTYPE=C; export LC_CTYPE; }
+(set +x; test -n "`(LANGUAGE=C; export LANGUAGE) 2>&1`") &&
+ { $as_unset LANGUAGE || test "${LANGUAGE+set}" != set; } ||
+ { LANGUAGE=C; export LANGUAGE; }
+(set +x; test -n "`(LC_COLLATE=C; export LC_COLLATE) 2>&1`") &&
+ { $as_unset LC_COLLATE || test "${LC_COLLATE+set}" != set; } ||
+ { LC_COLLATE=C; export LC_COLLATE; }
+(set +x; test -n "`(LC_NUMERIC=C; export LC_NUMERIC) 2>&1`") &&
+ { $as_unset LC_NUMERIC || test "${LC_NUMERIC+set}" != set; } ||
+ { LC_NUMERIC=C; export LC_NUMERIC; }
+(set +x; test -n "`(LC_MESSAGES=C; export LC_MESSAGES) 2>&1`") &&
+ { $as_unset LC_MESSAGES || test "${LC_MESSAGES+set}" != set; } ||
+ { LC_MESSAGES=C; export LC_MESSAGES; }
+
+
+# Name of the executable.
+as_me=`(basename "$0") 2>/dev/null ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conftest.sh
+ echo "exit 0" >>conftest.sh
+ chmod +x conftest.sh
+ if (PATH=".;."; conftest.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conftest.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH || test "${CDPATH+set}" != set || { CDPATH=$PATH_SEPARATOR; export CDPATH; }
+
+exec 6>&1
+
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling. Logging --version etc. is OK.
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.53. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+_ACEOF
+
+# Files that config.status was made for.
+if test -n "$ac_config_files"; then
+ echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_headers"; then
+ echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_links"; then
+ echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_commands"; then
+ echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number, then exit
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Report bugs to <bug-autoconf@gnu.org>."
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.53,
+ with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=$srcdir
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=*)
+ ac_option=`expr "x$1" : 'x\([^=]*\)='`
+ ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+ shift
+ set dummy "$ac_option" "$ac_optarg" ${1+"$@"}
+ shift
+ ;;
+ -*);;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_need_defaults=false;;
+ esac
+
+ case $1 in
+ # Handling of the options.
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ echo "running $SHELL $0 " $ac_configure_args " --no-create --no-recursion"
+ exec $SHELL $0 $ac_configure_args --no-create --no-recursion ;;
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+ --version | --vers* | -V )
+ echo "$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; };;
+ --help | --hel | -h )
+ echo "$ac_cs_usage"; exit 0 ;;
+ --debug | --d* | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ shift
+ CONFIG_FILES="$CONFIG_FILES $1"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ shift
+ CONFIG_HEADERS="$CONFIG_HEADERS $1"
+ ac_need_defaults=false;;
+
+ # This is an error.
+ -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; } ;;
+
+ *) ac_config_targets="$ac_config_targets $1" ;;
+
+ esac
+ shift
+done
+
+_ACEOF
+
+
+
+
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_config_target in $ac_config_targets
+do
+ case "$ac_config_target" in
+ # Handling of arguments.
+ "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+ "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+fi
+
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+ trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+: ${TMPDIR=/tmp}
+{
+ tmp=`(umask 077 && mktemp -d -q "$TMPDIR/csXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+} ||
+{
+ tmp=$TMPDIR/cs$$-$RANDOM
+ (umask 077 && mkdir $tmp)
+} ||
+{
+ echo "$me: cannot create a temporary directory in $TMPDIR" >&2
+ { (exit 1); exit 1; }
+}
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
+ s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
+s,@SHELL@,$SHELL,;t t
+s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
+s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
+s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
+s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
+s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
+s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
+s,@exec_prefix@,$exec_prefix,;t t
+s,@prefix@,$prefix,;t t
+s,@program_transform_name@,$program_transform_name,;t t
+s,@bindir@,$bindir,;t t
+s,@sbindir@,$sbindir,;t t
+s,@libexecdir@,$libexecdir,;t t
+s,@datadir@,$datadir,;t t
+s,@sysconfdir@,$sysconfdir,;t t
+s,@sharedstatedir@,$sharedstatedir,;t t
+s,@localstatedir@,$localstatedir,;t t
+s,@libdir@,$libdir,;t t
+s,@includedir@,$includedir,;t t
+s,@oldincludedir@,$oldincludedir,;t t
+s,@infodir@,$infodir,;t t
+s,@mandir@,$mandir,;t t
+s,@build_alias@,$build_alias,;t t
+s,@host_alias@,$host_alias,;t t
+s,@target_alias@,$target_alias,;t t
+s,@DEFS@,$DEFS,;t t
+s,@ECHO_C@,$ECHO_C,;t t
+s,@ECHO_N@,$ECHO_N,;t t
+s,@ECHO_T@,$ECHO_T,;t t
+s,@LIBS@,$LIBS,;t t
+s,@RUBYDIR@,$RUBYDIR,;t t
+s,@STATISTICS@,$STATISTICS,;t t
+s,@CC@,$CC,;t t
+s,@CFLAGS@,$CFLAGS,;t t
+s,@LDFLAGS@,$LDFLAGS,;t t
+s,@CPPFLAGS@,$CPPFLAGS,;t t
+s,@ac_ct_CC@,$ac_ct_CC,;t t
+s,@EXEEXT@,$EXEEXT,;t t
+s,@OBJEXT@,$OBJEXT,;t t
+s,@RANLIB@,$RANLIB,;t t
+s,@ac_ct_RANLIB@,$ac_ct_RANLIB,;t t
+s,@CPP@,$CPP,;t t
+s,@ALLOCA@,$ALLOCA,;t t
+s,@LIBOBJS@,$LIBOBJS,;t t
+s,@@,$,;t t
+CEOF
+
+_ACEOF
+
+ cat >>$CONFIG_STATUS <<\_ACEOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ else
+ sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ fi
+ if test ! -s $tmp/subs.frag; then
+ ac_more_lines=false
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { case "$ac_dir" in
+ [\\/]* | ?:[\\/]* ) as_incr_dir=;;
+ *) as_incr_dir=.;;
+esac
+as_dummy="$ac_dir"
+for as_mkdir_dir in `IFS='/\\'; set X $as_dummy; shift; echo "$@"`; do
+ case $as_mkdir_dir in
+ # Skip DOS drivespec
+ ?:) as_incr_dir=$as_mkdir_dir ;;
+ *)
+ as_incr_dir=$as_incr_dir/$as_mkdir_dir
+ test -d "$as_incr_dir" ||
+ mkdir "$as_incr_dir" ||
+ { { echo "$as_me:$LINENO: error: cannot create \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }
+ ;;
+ esac
+done; }
+
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
+# absolute.
+ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
+ac_abs_top_builddir=`cd "$ac_dir" && cd $ac_top_builddir && pwd`
+ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
+ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
+
+
+
+ if test x"$ac_file" != x-; then
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+ rm -f "$ac_file"
+ fi
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ configure_input=
+ else
+ configure_input="$ac_file. "
+ fi
+ configure_input=$configure_input"Generated from `echo $ac_file_in |
+ sed 's,.*/,,'` by configure."
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo $f;;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo $f
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo $srcdir/$f
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s,@configure_input@,$configure_input,;t t
+s,@srcdir@,$ac_srcdir,;t t
+s,@abs_srcdir@,$ac_abs_srcdir,;t t
+s,@top_srcdir@,$ac_top_srcdir,;t t
+s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s,@builddir@,$ac_builddir,;t t
+s,@abs_builddir@,$ac_abs_builddir,;t t
+s,@top_builddir@,$ac_top_builddir,;t t
+s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+ rm -f $tmp/stdin
+ if test x"$ac_file" != x-; then
+ mv $tmp/out $ac_file
+ else
+ cat $tmp/out
+ rm -f $tmp/out
+ fi
+
+done
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$,\1#\2'
+ac_dC=' '
+ac_dD=',;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$,\1#\2define\3'
+ac_uC=' '
+ac_uD=',;t'
+
+for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo $f;;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo $f
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo $srcdir/$f
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+ # Remove the trailing spaces.
+ sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
+
+_ACEOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+# Using a here document instead of a string reduces the quoting nightmare.
+# Putting comments in sed scripts is not portable.
+#
+# `end' is used to avoid that the second main sed command (meant for
+# 0-ary CPP macros) applies to n-ary macro definitions.
+# See the Autoconf documentation for `clear'.
+cat >confdef2sed.sed <<\_ACEOF
+s/[\\&,]/\\&/g
+s,[\\$`],\\&,g
+t clear
+: clear
+s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
+t end
+s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
+: end
+_ACEOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC-DEFINE to be honored.
+uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
+sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f confdef2sed.sed
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\_ACEOF
+s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+_ACEOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if egrep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $tmp/defines.sed.
+ echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#define' lines.
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/defines.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # egrep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $tmp/undefs.sed.
+ echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#undef'
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/undefs.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ echo "/* Generated by configure. */" >$tmp/config.h
+ else
+ echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
+ fi
+ cat $tmp/in >>$tmp/config.h
+ rm -f $tmp/in
+ if test x"$ac_file" != x-; then
+ if cmp -s $ac_file $tmp/config.h 2>/dev/null; then
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { case "$ac_dir" in
+ [\\/]* | ?:[\\/]* ) as_incr_dir=;;
+ *) as_incr_dir=.;;
+esac
+as_dummy="$ac_dir"
+for as_mkdir_dir in `IFS='/\\'; set X $as_dummy; shift; echo "$@"`; do
+ case $as_mkdir_dir in
+ # Skip DOS drivespec
+ ?:) as_incr_dir=$as_mkdir_dir ;;
+ *)
+ as_incr_dir=$as_incr_dir/$as_mkdir_dir
+ test -d "$as_incr_dir" ||
+ mkdir "$as_incr_dir" ||
+ { { echo "$as_me:$LINENO: error: cannot create \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }
+ ;;
+ esac
+done; }
+
+ rm -f $ac_file
+ mv $tmp/config.h $ac_file
+ fi
+ else
+ cat $tmp/config.h
+ rm -f $tmp/config.h
+ fi
+done
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || { (exit 1); exit 1; }
+fi
+
diff --git a/ext/mbstring/oniguruma/configure.in b/ext/mbstring/oniguruma/configure.in
deleted file mode 100644
index 84af3fbdb8..0000000000
--- a/ext/mbstring/oniguruma/configure.in
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl Process this file with autoconf to produce a configure script.
-AC_INIT(regex.c)
-
-AC_CONFIG_HEADER(config.h)
-
-dnl default value for RUBYDIR
-RUBYDIR=".."
-AC_ARG_WITH(rubydir,
- [ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)],
- [ RUBYDIR=$withval ])
-AC_SUBST(RUBYDIR)
-
-dnl default value for STATISTICS
-STATISTICS=""
-AC_ARG_WITH(statistics,
- [ --with-statistics take matching time statistical data],
- [ STATISTICS=-DREG_DEBUG_STATISTICS ])
-AC_SUBST(STATISTICS)
-
-dnl Checks for programs.
-AC_PROG_CC
-AC_PROG_RANLIB
-dnl AC_PROG_INSTALL
-
-dnl Checks for libraries.
-
-dnl Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h)
-
-dnl Checks for typedefs, structures, and compiler characteristics.
-AC_CHECK_SIZEOF(int, 4)
-AC_CHECK_SIZEOF(short, 2)
-AC_CHECK_SIZEOF(long, 4)
-AC_C_CONST
-AC_HEADER_TIME
-
-dnl Checks for library functions.
-AC_FUNC_ALLOCA
-AC_FUNC_MEMCMP
-
-AC_CACHE_CHECK(for prototypes, cv_have_prototypes,
- [AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);],
- cv_have_prototypes=yes,
- cv_have_prototypes=no)])
-if test "$cv_have_prototypes" = yes; then
- AC_DEFINE(HAVE_PROTOTYPES)
-fi
-
-AC_CACHE_CHECK(for variable length prototypes and stdarg.h, cv_stdarg,
- [AC_TRY_COMPILE([
-#include <stdarg.h>
-int foo(int x, ...) {
- va_list va;
- va_start(va, x);
- va_arg(va, int);
- va_arg(va, char *);
- va_arg(va, double);
- return 0;
-}
-], [return foo(10, "", 3.14);],
- cv_stdarg=yes,
- cv_stdarg=no)])
-if test "$cv_stdarg" = yes; then
- AC_DEFINE(HAVE_STDARG_PROTOTYPES)
-fi
-
-AC_SUBST()
-
-AC_OUTPUT(Makefile)
diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API
deleted file mode 100644
index 96f53ae9b8..0000000000
--- a/ext/mbstring/oniguruma/doc/API
+++ /dev/null
@@ -1,279 +0,0 @@
-Oniguruma API 2003/07/04
-
-declared in regex.h.
-
-
-# int regex_init(void)
-
- Initialize library.
-
- You don't have to call it explicitly, because it is called in regex_new().
-
-
-# int regex_error_code_to_str(UChar* err_buf, int err_code, ...)
-
- Return error message string length.
-
- arguments
- 1 err_buf: error message buffer.
- (required size: REG_MAX_ERROR_MESSAGE_LEN)
- 2 err_code: error code returned from other API functions.
- 3 err_info (optional): error info returned from regex_new()
- and regex_recompile().
-
-
-# int regex_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
- RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
- RegErrorInfo* err_info)
-
- Create new regex object(regex_t).
-
- normal return: REG_NORMAL
-
- arguments
- 1 reg: return regex object's address.
- 2 pattern: regex pattern string.
- 3 pattern_end: terminate address of pattern. (pattern + pattern length)
- 4 option: compile time options.
-
- REG_OPTION_NONE no option
- REG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
- REG_OPTION_MULTILINE '.' match with newline
- REG_OPTION_IGNORECASE ignore case (case-insensitive)
- REG_OPTION_EXTEND extended pattern form
- REG_OPTION_FIND_LONGEST find longest match
- REG_OPTION_FIND_NOT_EMPTY ignore empty match
- REG_OPTION_NEGATE_SINGLELINE
- clear REG_OPTION_SINGLELINE which is default on
- in REG_SYNTAX_POSIX_XXX, REG_SYNTAX_PERL and REG_SYNTAX_JAVA.
- REG_OPTION_CAPTURE_ONLY_NAMED_GROUP named group only captured.
-
- 5 code: character encoding.
-
- REGCODE_ASCII ASCII
- REGCODE_UTF8 UTF-8
- REGCODE_EUCJP EUC-JP
- REGCODE_SJIS Shift_JIS
- REGCODE_DEFAULT ASCII
-
- 6 syntax: pointer to pattern syntax definition.
-
- REG_SYNTAX_POSIX_BASIC POSIX Basic RE
- REG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
- REG_SYNTAX_EMACS Emacs
- REG_SYNTAX_GREP grep
- REG_SYNTAX_GNU_REGEX GNU regex
- REG_SYNTAX_JAVA Java (Sun java.util.regex)
- REG_SYNTAX_PERL Perl
- REG_SYNTAX_RUBY Ruby
- REG_SYNTAX_DEFAULT default (== Ruby)
- regex_set_default_syntax()
-
- or any RegSyntaxType data pointer defined by user.
-
- 7 err_info: address for return optional error info.
- use this value as 3rd argument of regex_error_code_to_str().
-
-
-# void regex_free(regex_t* reg)
-
- Free memory used by regex object.
-
- arguments
- 1 reg: regex object.
-
-
-# int regex_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
- RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
- RegErrorInfo* err_info)
-
- Recompile regex object.
-
- normal return: REG_NORMAL
-
- arguments
- 1 reg: regex object.
-
- Another arguments are same with regex_new().
-
-
-# int regex_search(regex_t* reg, UChar* str, UChar* end, UChar* start,
- UChar* range, RegRegion* region, RegOptionType option)
-
- Search string and return search result and matching region.
-
- normal return: match position offset (i.e. p - str >= 0)
- not found: REG_MISMATCH (< 0)
-
- arguments
- 1 reg: regex object
- 2 str: target string
- 3 end: terminate address of target string
- 4 start: search start address of target string
- 5 range: search terminate address of target string
- 6 region: address for return group match range info (NULL is allowed)
- 7 option: search time option
-
- REG_OPTION_NOTBOL string head(str) isn't considered as begin of line
- REG_OPTION_NOTEOL string end (end) isn't considered as end of line
- REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
-
-
-# int regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at,
- RegRegion* region, RegOptionType option)
-
- Match string and return result and matching region.
-
- normal return: match length (i.e. p - at >= 0)
- not match: REG_MISMATCH (< 0)
-
- arguments
- 1 reg: regex object
- 2 str: target string
- 3 end: terminate address of target string
- 4 at: match address of target string
- 5 region: address for return group match range info (NULL is allowed)
- 6 option: search time option
-
- REG_OPTION_NOTBOL string head(str) isn't considered as begin of line
- REG_OPTION_NOTEOL string end (end) isn't considered as end of line
- REG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
-
-
-# RegRegion* regex_region_new(void)
-
- Create a region.
-
-
-# void regex_region_free(RegRegion* region, int free_self)
-
- Free memory used by region.
-
- arguments
- 1 region: target region
- 2 free_self: [1: free all, 0: free memory used in region but not self]
-
-
-# void regex_region_copy(RegRegion* to, RegRegion* from)
-
- Copy contents of region.
-
- arguments
- 1 to: target region
- 2 from: source region
-
-
-# void regex_region_clear(RegRegion* region)
-
- Clear contents of region.
-
- arguments
- 1 region: target region
-
-
-# int regex_region_resize(RegRegion* region, int n)
-
- Resize group range area of region.
-
- normal return: REG_NORMAL
-
- arguments
- 1 region: target region
- 2 n: new size
-
-
-# int regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
- int** num_list)
-
- Return group number list of name.
- Named subexp is defined by (?<name>....).
-
- normal return: number of groups for the name.
- (ex. /(?<x>..)...(?<x>..)/ ==> 2)
- name not found: -1
-
- arguments
- 1 reg: regex object.
- 2 name: subexp-name.
- 3 name_end: terminate address of subexp-name.
- 4 num_list: return list of group number.
-
-
-# int regex_foreach_names(regex_t* reg, int (*func)(UChar*,int,int*,void*),
- void* arg)
-
- Iterate function call for all names.
-
- normal return: 0
- error: func's return value.
-
- arguments
- 1 reg: regex object.
- 2 func: called function.
- func(name, <number of groups>, <group number's list>, arg);
- if func return non 0 value, iteration is stopped.
- 3 arg: argument for func.
-
-
-# UChar* regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s)
-
- Return previous character head address.
-
- arguments
- 1 code: character encoding
- 2 start: string address
- 3 s: target address of string
-
-
-# UChar* regex_get_left_adjust_char_head(RegCharEncoding code,
- UChar* start, UChar* s)
-
- Return left-adjusted head address of a character.
-
- arguments
- 1 code: character encoding
- 2 start: string address
- 3 s: target address of string
-
-
-# UChar* regex_get_right_adjust_char_head(RegCharEncoding code,
- UChar* start, UChar* s)
-
- Return right-adjusted head address of a character.
-
- arguments
- 1 code: character encoding
- 2 start: string address
- 3 s: target address of string
-
-
-# int regex_set_default_syntax(RegSyntaxType* syntax)
-
- Set default syntax.
-
- arguments
- 1 syntax: pointer to pattern syntax definition.
-
-
-# void regex_set_default_trans_table(UChar* table)
-
- Set default case transformation table.
-
- arguments
- 1 table: case transformation table
-
- (* this function will be obsoleted in future version)
-
-
-# int regex_end(void)
-
- The use of this library is finished.
-
- normal return: REG_NORMAL
-
-
-# const char* regex_version(void)
-
- Return version string. (ex. "1.8.6")
-
-// END
diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE
deleted file mode 100644
index 3527b4556f..0000000000
--- a/ext/mbstring/oniguruma/doc/RE
+++ /dev/null
@@ -1,224 +0,0 @@
-Oniguruma Regular Expressions 2003/07/04
-
-syntax: REG_SYNTAX_RUBY (default)
-
-
-1. Syntax elements
-
- \ escape
- | alternation
- (...) group
- [...] character class
-
-
-2. Characters
-
- \t horizontal tab (0x09)
- \v vertical tab (0x0B)
- \n newline (0x0A)
- \r return (0x0D)
- \b back space (0x08) (* in character class only)
- \f form feed (0x0C)
- \a bell (0x07)
- \e escape (0x1B)
- \nnn octal char
- \xHH hexadecimal char
- \x{7HHHHHHH} wide hexadecimal char
- \cx control char
- \C-x control char
- \M-x meta (x|0x80)
- \M-\C-x meta control char
-
-
-3. Character types
-
- . any character (except newline)
- \w word character (alphanumeric, "_" and multibyte char)
- \W non-word char
- \s whitespace char (\t, \n, \v, \f, \r, \x20)
- \S non-whitespace char
- \d digit char
- \D non-digit char
-
-
-4. Quantifier
-
- greedy
-
- ? 1 or 0 times
- * 0 or more times
- + 1 or more times
- {n,m} at least n but not more than m times
- {n,} at least n times
- {n} n times
-
- reluctant
-
- ?? 1 or 0 times
- *? 0 or more times
- +? 1 or more times
- {n,m}? at least n but not more than m times
- {n,}? at least n times
-
- possessive (greedy and does not backtrack after repeated)
-
- ?+ 1 or 0 times
- *+ 0 or more times
- ++ 1 or more times
-
-
-5. Anchors
-
- ^ beginning of the line
- $ end of the line
- \b word boundary
- \B not word boundary
- \A beginning of string
- \Z end of string, or before newline at the end
- \z end of string
- \G previous end-of-match position
-
-
-6. POSIX character class ([:xxxxx:], negate [:^xxxxx:])
-
- alnum alphabet or digit char
- alpha alphabet
- ascii code value: [0 - 127]
- blank \t, \x20
- cntrl
- digit 0-9
- graph
- lower
- print
- punct
- space \t, \n, \v, \f, \r, \x20
- upper
- xdigit 0-9, a-f, A-F
-
-
-7. Operators in character class
-
- [...] group (character class in character class)
- && intersection
- (lowest precedence operator in character class)
-
- ex. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
-
-
-8. Extended expressions
-
- (?#...) comment
- (?imx-imx) option on/off
- i: ignore case
- m: multi-line (dot(.) match newline)
- x: extended form
- (?imx-imx:subexp) option on/off for subexp
- (?:subexp) not captured
- (?=subexp) look-ahead
- (?!subexp) negative look-ahead
- (?<=subexp) look-behind
- (?<!subexp) negative look-behind
-
- Subexp of look-behind must be fixed character length.
- But different character length is allowed in top level
- alternatives only.
- ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
-
- (?>subexp) don't backtrack
- (?<name>subexp) define named group
- (name can not include '>', ')', '\' and NUL character)
-
-
-9. Back reference
-
- \n back reference by group number (n >= 1)
- \k<name> back reference by group name
-
-
-10. Subexp call ("Tanaka Akira special")
-
- \g<name> call by group name
- \g<n> call by group number (only if 'n' is not defined as name)
-
-
------------------------------
-11. Original extensions
-
- + named group (?<name>...)
- + named backref \k<name>
- + subexp call \g<name>, \g<group-num>
-
-
-12. Lacked features compare with perl 5.8.0
-
- + [:word:]
- + \N{name}
- + \l,\u,\L,\U, \P, \X, \C
- + (?{code})
- + (??{code})
- + (?(condition)yes-pat|no-pat)
-
- + \Q...\E (* This is effective on REG_SYNTAX_PERL and REG_SYNTAX_JAVA)
-
-
-13. Syntax depend options
-
- + REG_SYNTAX_RUBY (default)
- (?m): dot(.) match newline
-
- + REG_SYNTAX_PERL, REG_SYNTAX_JAVA
- (?s): dot(.) match newline
- (?m): ^ match after newline, $ match before newline
-
-
-14. Differences with Japanized GNU regex(version 0.12) of Ruby
-
- + add look behind
- (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
- (in negative-look-behind, capture group isn't allowed,
- shy group(?:) is allowed.)
- + add possessive quantifier. ?+, *+, ++
- + add operations in character class. [], &&
- + add named group and subexp call.
- + octal or hexadecimal number sequence can be treated as
- a multibyte code char in char-class, if multibyte encoding is specified.
- (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
- + effect range of isolated option is to next ')'.
- ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
- + isolated option is not transparent to previous pattern.
- ex. a(?i)* is a syntax error pattern.
- + allowed incompleted left brace as an usual char.
- ex. /{/, /({)/, /a{2,3/ etc...
- + negative POSIX bracket [:^xxxx:] is supported.
- + POSIX bracket [:ascii:] is added.
- + repeat of look-ahead is not allowd.
- ex. /(?=a)*/, /(?!b){5}/
-
-
-14. Problems
-
- + Invalid first byte in UTF-8 is allowed.
- (which is the same as GNU regex of Ruby)
-
- /./u =~ "\xa3"
-
- Of course, although it is possible to validate,
- it will become later than now.
-
- + Zero-length match in infinite repeat stops the repeat,
- and captured group status isn't checked as stop condition.
-
- /()*\1/ =~ "" #=> match
- /(?:()|())*\1\2/ =~ "" #=> fail
-
- /(?:\1a|())*/ =~ "a" #=> match with ""
-
- + Ignore case option is not effect to an octal or hexadecimal
- numbered char, but it becomes effective if it appears in the char class.
- This doesn't have consistency, though they are the specifications
- which are the same as GNU regex of Ruby.
-
- /\x61/i.match("A") # => nil
- /[\x61]/i.match("A") # => match
-
-// END
diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h
new file mode 100644
index 0000000000..4c029304b6
--- /dev/null
+++ b/ext/mbstring/oniguruma/onigcmpt200.h
@@ -0,0 +1,304 @@
+/**********************************************************************
+
+ onigcmpt200.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef ONIGCMPT200_H
+#define ONIGCMPT200_H
+
+/* constants */
+#define REG_MAX_ERROR_MESSAGE_LEN ONIG_MAX_ERROR_MESSAGE_LEN
+
+#define RegCharEncoding OnigEncoding
+
+#define REG_ENCODING_ASCII ONIG_ENCODING_ASCII
+#define REG_ENCODING_ISO_8859_1 ONIG_ENCODING_ISO_8859_1
+#define REG_ENCODING_ISO_8859_15 ONIG_ENCODING_ISO_8859_15
+#define REG_ENCODING_UTF8 ONIG_ENCODING_UTF8
+#define REG_ENCODING_EUC_JP ONIG_ENCODING_EUC_JP
+#define REG_ENCODING_SJIS ONIG_ENCODING_SJIS
+#define REG_ENCODING_BIG5 ONIG_ENCODING_BIG5
+#define REG_ENCODING_UNDEF ONIG_ENCODING_UNDEF
+
+/* Don't use REGCODE_XXXX. (obsoleted) */
+#define REGCODE_UNDEF REG_ENCODING_UNDEF
+#define REGCODE_ASCII REG_ENCODING_ASCII
+#define REGCODE_UTF8 REG_ENCODING_UTF8
+#define REGCODE_EUCJP REG_ENCODING_EUC_JP
+#define REGCODE_SJIS REG_ENCODING_SJIS
+
+typedef unsigned char* RegTransTableType;
+#define RegOptionType OnigOptionType
+#define RegDistance OnigDistance
+
+#define REG_OPTION_DEFAULT ONIG_OPTION_DEFAULT
+
+/* options */
+#define REG_OPTION_NONE ONIG_OPTION_NONE
+#define REG_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define REG_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define REG_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define REG_OPTION_EXTEND ONIG_OPTION_EXTEND
+#define REG_OPTION_FIND_LONGEST ONIG_OPTION_FIND_LONGEST
+#define REG_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
+#define REG_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
+#define REG_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
+#define REG_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
+#define REG_OPTION_NOTBOL ONIG_OPTION_NOTBOL
+#define REG_OPTION_NOTEOL ONIG_OPTION_NOTEOL
+#define REG_OPTION_POSIX_REGION ONIG_OPTION_POSIX_REGION
+
+#define REG_OPTION_ON ONIG_OPTION_ON
+#define REG_OPTION_OFF ONIG_OPTION_OFF
+#define IS_REG_OPTION_ON ONIG_IS_OPTION_ON
+
+/* syntax */
+#define RegSyntaxType OnigSyntaxType
+
+#define RegSyntaxPosixBasic OnigSyntaxPosixBasic
+#define RegSyntaxPosixExtended OnigSyntaxPosixExtended
+#define RegSyntaxEmacs OnigSyntaxEmacs
+#define RegSyntaxGrep OnigSyntaxGrep
+#define RegSyntaxGnuRegex OnigSyntaxGnuRegex
+#define RegSyntaxJava OnigSyntaxJava
+#define RegSyntaxPerl OnigSyntaxPerl
+#define RegSyntaxRuby OnigSyntaxRuby
+
+#define REG_SYNTAX_POSIX_BASIC ONIG_SYNTAX_POSIX_BASIC
+#define REG_SYNTAX_POSIX_EXTENDED ONIG_SYNTAX_POSIX_EXTENDED
+#define REG_SYNTAX_EMACS ONIG_SYNTAX_EMACS
+#define REG_SYNTAX_GREP ONIG_SYNTAX_GREP
+#define REG_SYNTAX_GNU_REGEX ONIG_SYNTAX_GNU_REGEX
+#define REG_SYNTAX_JAVA ONIG_SYNTAX_JAVA
+#define REG_SYNTAX_PERL ONIG_SYNTAX_PERL
+#define REG_SYNTAX_RUBY ONIG_SYNTAX_RUBY
+
+#define REG_SYNTAX_DEFAULT ONIG_SYNTAX_DEFAULT
+#define RegDefaultSyntax OnigDefaultSyntax
+
+/* syntax (operators) */
+#define REG_SYN_OP_VARIABLE_META_CHARACTERS \
+ ONIG_SYN_OP_VARIABLE_META_CHARACTERS
+#define REG_SYN_OP_DOT_ANYCHAR \
+ ONIG_SYN_OP_DOT_ANYCHAR
+#define REG_SYN_OP_ASTERISK_ZERO_INF \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF
+#define REG_SYN_OP_ESC_ASTERISK_ZERO_INF \
+ ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
+#define REG_SYN_OP_PLUS_ONE_INF \
+ ONIG_SYN_OP_PLUS_ONE_INF
+#define REG_SYN_OP_ESC_PLUS_ONE_INF \
+ ONIG_SYN_OP_ESC_PLUS_ONE_INF
+#define REG_SYN_OP_QMARK_ZERO_ONE \
+ ONIG_SYN_OP_QMARK_ZERO_ONE
+#define REG_SYN_OP_ESC_QMARK_ZERO_ONE \
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
+#define REG_SYN_OP_BRACE_INTERVAL \
+ ONIG_SYN_OP_BRACE_INTERVAL
+#define REG_SYN_OP_ESC_BRACE_INTERVAL \
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL
+#define REG_SYN_OP_VBAR_ALT \
+ ONIG_SYN_OP_VBAR_ALT
+#define REG_SYN_OP_ESC_VBAR_ALT \
+ ONIG_SYN_OP_ESC_VBAR_ALT
+#define REG_SYN_OP_LPAREN_SUBEXP \
+ ONIG_SYN_OP_LPAREN_SUBEXP
+#define REG_SYN_OP_ESC_LPAREN_SUBEXP \
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP
+#define REG_SYN_OP_ESC_AZ_BUF_ANCHOR \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
+#define REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR \
+ ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
+#define REG_SYN_OP_DECIMAL_BACKREF \
+ ONIG_SYN_OP_DECIMAL_BACKREF
+#define REG_SYN_OP_BRACKET_CC \
+ ONIG_SYN_OP_BRACKET_CC
+#define REG_SYN_OP_ESC_W_WORD \
+ ONIG_SYN_OP_ESC_W_WORD
+#define REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END \
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
+#define REG_SYN_OP_ESC_B_WORD_BOUND \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND
+#define REG_SYN_OP_ESC_S_WHITE_SPACE \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE
+#define REG_SYN_OP_ESC_D_DIGIT \
+ ONIG_SYN_OP_ESC_D_DIGIT
+#define REG_SYN_OP_LINE_ANCHOR \
+ ONIG_SYN_OP_LINE_ANCHOR
+#define REG_SYN_OP_POSIX_BRACKET \
+ ONIG_SYN_OP_POSIX_BRACKET
+#define REG_SYN_OP_QMARK_NON_GREEDY \
+ ONIG_SYN_OP_QMARK_NON_GREEDY
+#define REG_SYN_OP_ESC_CONTROL_CHARS \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS
+#define REG_SYN_OP_ESC_C_CONTROL \
+ ONIG_SYN_OP_ESC_C_CONTROL
+#define REG_SYN_OP_ESC_OCTAL3 \
+ ONIG_SYN_OP_ESC_OCTAL3
+#define REG_SYN_OP_ESC_X_HEX2 \
+ ONIG_SYN_OP_ESC_X_HEX2
+#define REG_SYN_OP_ESC_X_BRACE_HEX8 \
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8
+
+#define REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE \
+ ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
+#define REG_SYN_OP2_QMARK_GROUP_EFFECT \
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT
+#define REG_SYN_OP2_OPTION_PERL \
+ ONIG_SYN_OP2_OPTION_PERL
+#define REG_SYN_OP2_OPTION_RUBY \
+ ONIG_SYN_OP2_OPTION_RUBY
+#define REG_SYN_OP2_PLUS_POSSESSIVE_REPEAT \
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
+#define REG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL \
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
+#define REG_SYN_OP2_CCLASS_SET_OP \
+ ONIG_SYN_OP2_CCLASS_SET_OP
+#define REG_SYN_OP2_QMARK_LT_NAMED_GROUP \
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
+#define REG_SYN_OP2_ESC_K_NAMED_BACKREF \
+ ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
+#define REG_SYN_OP2_ESC_G_SUBEXP_CALL \
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
+#define REG_SYN_OP2_ATMARK_CAPTURE_HISTORY \
+ ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
+#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL \
+ ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
+#define REG_SYN_OP2_ESC_CAPITAL_M_BAR_META \
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
+#define REG_SYN_OP2_ESC_V_VTAB \
+ ONIG_SYN_OP2_ESC_V_VTAB
+#define REG_SYN_OP2_ESC_U_HEX4 \
+ ONIG_SYN_OP2_ESC_U_HEX4
+#define REG_SYN_OP2_ESC_GNU_BUF_ANCHOR \
+ ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+
+#define REG_SYN_CONTEXT_INDEP_ANCHORS \
+ ONIG_SYN_CONTEXT_INDEP_ANCHORS
+#define REG_SYN_CONTEXT_INDEP_REPEAT_OPS \
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
+#define REG_SYN_CONTEXT_INVALID_REPEAT_OPS \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
+#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP \
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
+#define REG_SYN_ALLOW_INVALID_INTERVAL \
+ ONIG_SYN_ALLOW_INVALID_INTERVAL
+#define REG_SYN_STRICT_CHECK_BACKREF \
+ ONIG_SYN_STRICT_CHECK_BACKREF
+#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND \
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
+#define REG_SYN_CAPTURE_ONLY_NAMED_GROUP \
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
+#define REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME \
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
+
+#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC \
+ ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
+#define REG_SYN_BACKSLASH_ESCAPE_IN_CC \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
+#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC \
+ ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+#define REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC \
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
+#define REG_SYN_WARN_CC_OP_NOT_ESCAPED \
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
+#define REG_SYN_WARN_REDUNDANT_NESTED_REPEAT \
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
+
+/* meta character specifiers (regex_set_meta_char()) */
+#define REG_META_CHAR_ESCAPE ONIG_META_CHAR_ESCAPE
+#define REG_META_CHAR_ANYCHAR ONIG_META_CHAR_ANYCHAR
+#define REG_META_CHAR_ANYTIME ONIG_META_CHAR_ANYTIME
+#define REG_META_CHAR_ZERO_OR_ONE_TIME ONIG_META_CHAR_ZERO_OR_ONE_TIME
+#define REG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ONE_OR_MORE_TIME
+#define REG_META_CHAR_ANYCHAR_ANYTIME ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+#define REG_INEFFECTIVE_META_CHAR ONIG_INEFFECTIVE_META_CHAR
+
+/* error codes */
+#define REG_IS_PATTERN_ERROR ONIG_IS_PATTERN_ERROR
+/* normal return */
+#define REG_NORMAL ONIG_NORMAL
+#define REG_MISMATCH ONIG_MISMATCH
+#define REG_NO_SUPPORT_CONFIG ONIG_NO_SUPPORT_CONFIG
+/* internal error */
+#define REGERR_MEMORY ONIGERR_MEMORY
+#define REGERR_MATCH_STACK_LIMIT_OVER ONIGERR_MATCH_STACK_LIMIT_OVER
+#define REGERR_TYPE_BUG ONIGERR_TYPE_BUG
+#define REGERR_PARSER_BUG ONIGERR_PARSER_BUG
+#define REGERR_STACK_BUG ONIGERR_STACK_BUG
+#define REGERR_UNDEFINED_BYTECODE ONIGERR_UNDEFINED_BYTECODE
+#define REGERR_UNEXPECTED_BYTECODE ONIGERR_UNEXPECTED_BYTECODE
+#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED \
+ ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
+#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR \
+ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR
+/* general error */
+#define REGERR_INVALID_ARGUMENT ONIGERR_INVALID_ARGUMENT
+/* errors related to thread */
+#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT \
+ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define REG_MAX_CAPTURE_HISTORY_GROUP ONIG_MAX_CAPTURE_HISTORY_GROUP
+#define REG_IS_CAPTURE_HISTORY_GROUP ONIG_IS_CAPTURE_HISTORY_GROUP
+
+#define REG_REGION_NOTPOS ONIG_REGION_NOTPOS
+
+#define RegRegion OnigRegion
+#define RegErrorInfo OnigErrorInfo
+#define RegRepeatRange OnigRepeatRange
+
+#define RegWarnFunc OnigWarnFunc
+#define regex_null_warn onig_null_warn
+#define REG_NULL_WARN ONIG_NULL_WARN
+
+/* regex_t state */
+#define REG_STATE_NORMAL ONIG_STATE_NORMAL
+#define REG_STATE_SEARCHING ONIG_STATE_SEARCHING
+#define REG_STATE_COMPILING ONIG_STATE_COMPILING
+#define REG_STATE_MODIFY ONIG_STATE_MODIFY
+
+#define REG_STATE ONIG_STATE
+
+/* Oniguruma Native API */
+#define regex_init onig_init
+#define regex_error_code_to_str onig_error_code_to_str
+#define regex_set_warn_func onig_set_warn_func
+#define regex_set_verb_warn_func onig_set_verb_warn_func
+#define regex_new onig_new
+#define regex_free onig_free
+#define regex_recompile onig_recompile
+#define regex_search onig_search
+#define regex_match onig_match
+#define regex_region_new onig_region_new
+#define regex_region_free onig_region_free
+#define regex_region_copy onig_region_copy
+#define regex_region_clear onig_region_clear
+#define regex_region_resize onig_region_resize
+#define regex_name_to_group_numbers onig_name_to_group_numbers
+#define regex_name_to_backref_number onig_name_to_backref_number
+#define regex_foreach_name onig_foreach_name
+#define regex_number_of_names onig_number_of_names
+#define regex_get_encoding onig_get_encoding
+#define regex_get_options onig_get_options
+#define regex_get_syntax onig_get_syntax
+#define regex_set_default_syntax onig_set_default_syntax
+#define regex_copy_syntax onig_copy_syntax
+#define regex_set_meta_char onig_set_meta_char
+#define regex_end onig_end
+#define regex_version onig_version
+
+/* encoding API */
+#define enc_get_prev_char_head onigenc_get_prev_char_head
+#define enc_get_left_adjust_char_head onigenc_get_left_adjust_char_head
+#define enc_get_right_adjust_char_head onigenc_get_right_adjust_char_head
+/* obsoleted API */
+#define regex_get_prev_char_head onigenc_get_prev_char_head
+#define regex_get_left_adjust_char_head onigenc_get_left_adjust_char_head
+#define regex_get_right_adjust_char_head onigenc_get_right_adjust_char_head
+
+#endif /* ONIGCMPT200_H */
diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h
new file mode 100644
index 0000000000..d78dc18b11
--- /dev/null
+++ b/ext/mbstring/oniguruma/oniggnu.h
@@ -0,0 +1,77 @@
+/**********************************************************************
+
+ oniggnu.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef ONIGGNU_H
+#define ONIGGNU_H
+
+#include "oniguruma.h"
+
+#define MBCTYPE_ASCII 0
+#define MBCTYPE_EUC 1
+#define MBCTYPE_SJIS 2
+#define MBCTYPE_UTF8 3
+
+/* GNU regex options */
+#ifndef RE_NREGS
+#define RE_NREGS ONIG_NREGION
+#endif
+#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
+#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
+#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+
+#ifdef RUBY_PLATFORM
+#define re_mbcinit ruby_re_mbcinit
+#define re_compile_pattern ruby_re_compile_pattern
+#define re_recompile_pattern ruby_re_recompile_pattern
+#define re_free_pattern ruby_re_free_pattern
+#define re_adjust_startpos ruby_re_adjust_startpos
+#define re_search ruby_re_search
+#define re_match ruby_re_match
+#define re_set_casetable ruby_re_set_casetable
+#define re_copy_registers ruby_re_copy_registers
+#define re_free_registers ruby_re_free_registers
+#define register_info_type ruby_register_info_type
+#define re_error_code_to_str ruby_error_code_to_str
+
+#define ruby_error_code_to_str onig_error_code_to_str
+#define ruby_re_copy_registers onig_region_copy
+#else
+#define re_error_code_to_str onig_error_code_to_str
+#define re_copy_registers onig_region_copy
+#endif
+
+#ifdef ONIG_RUBY_M17N
+ONIG_EXTERN
+void re_mbcinit P_((OnigEncoding));
+#else
+ONIG_EXTERN
+void re_mbcinit P_((int));
+#endif
+
+ONIG_EXTERN
+int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+void re_free_pattern P_((struct re_pattern_buffer*));
+ONIG_EXTERN
+int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
+ONIG_EXTERN
+int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
+ONIG_EXTERN
+int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
+ONIG_EXTERN
+void re_set_casetable P_((const char*));
+ONIG_EXTERN
+void re_free_registers P_((struct re_registers*));
+ONIG_EXTERN
+int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
+
+#endif /* ONIGGNU_H */
diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h
index ea93c6f9f7..3793ae6bd9 100644
--- a/ext/mbstring/oniguruma/onigposix.h
+++ b/ext/mbstring/oniguruma/onigposix.h
@@ -2,7 +2,7 @@
onigposix.h - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGPOSIX_H
@@ -13,7 +13,7 @@
#define REG_NEWLINE (1<<1)
#define REG_NOTBOL (1<<2)
#define REG_NOTEOL (1<<3)
-#define REG_EXTENDED (1<<4) /* if not setted, Basic Regular Expression */
+#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
#define REG_NOSUB (1<<5)
/* POSIX error codes */
@@ -38,11 +38,10 @@
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
-/* These value must be same with MBCTYPE_XXXX in oniguruma.h.*/
-#define REG_ENCODING_ASCII 0
-#define REG_ENCODING_EUC_JP 1
-#define REG_ENCODING_SJIS 2
-#define REG_ENCODING_UTF8 3
+#define REG_POSIX_ENCODING_ASCII 0
+#define REG_POSIX_ENCODING_EUC_JP 1
+#define REG_POSIX_ENCODING_SJIS 2
+#define REG_POSIX_ENCODING_UTF8 3
#include <stdlib.h>
@@ -63,73 +62,75 @@ typedef struct {
#ifndef P_
-#ifdef __STDC__
+#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
-#ifndef REG_EXTERN
+#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
-#define REG_EXTERN extern __declspec(dllexport)
-#elif defined(IMPORT)
-#define REG_EXTERN extern __declspec(dllimport)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
-#ifndef REG_EXTERN
-#define REG_EXTERN extern
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
#endif
#ifndef ONIGURUMA_H
-typedef unsigned int RegOptionType;
+typedef unsigned int OnigOptionType;
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
- RegOptionType options; /* default option */
-} RegSyntaxType;
-
-REG_EXTERN RegSyntaxType RegSyntaxPosixBasic;
-REG_EXTERN RegSyntaxType RegSyntaxPosixExtended;
-REG_EXTERN RegSyntaxType RegSyntaxEmacs;
-REG_EXTERN RegSyntaxType RegSyntaxGrep;
-REG_EXTERN RegSyntaxType RegSyntaxGnuRegex;
-REG_EXTERN RegSyntaxType RegSyntaxJava;
-REG_EXTERN RegSyntaxType RegSyntaxPerl;
-REG_EXTERN RegSyntaxType RegSyntaxRuby;
-
-/* predefined syntaxes (see regcomp.c) */
-#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic)
-#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended)
-#define REG_SYNTAX_EMACS (&RegSyntaxEmacs)
-#define REG_SYNTAX_GREP (&RegSyntaxGrep)
-#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex)
-#define REG_SYNTAX_JAVA (&RegSyntaxJava)
-#define REG_SYNTAX_PERL (&RegSyntaxPerl)
-#define REG_SYNTAX_RUBY (&RegSyntaxRuby)
+ OnigOptionType options; /* default option */
+} OnigSyntaxType;
+
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regparse.c) */
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
-#define REG_SYNTAX_DEFAULT RegDefaultSyntax
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
-REG_EXTERN RegSyntaxType* RegDefaultSyntax;
+ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
-REG_EXTERN int regex_set_default_syntax P_((RegSyntaxType* syntax));
+ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
+ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
#endif /* ONIGURUMA_H */
-REG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
-REG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
-REG_EXTERN void regfree P_((regex_t* reg));
-REG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
+ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
+ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
+ONIG_EXTERN void regfree P_((regex_t* reg));
+ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
/* extended API */
-REG_EXTERN void reg_set_encoding P_((int enc));
-REG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
-REG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,int,int*,void*), void* arg));
+ONIG_EXTERN void reg_set_encoding P_((int enc));
+ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
+ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), void* arg));
+ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
#endif /* ONIGPOSIX_H */
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h
index e5236a80a7..fd9e8f1700 100644
--- a/ext/mbstring/oniguruma/oniguruma.h
+++ b/ext/mbstring/oniguruma/oniguruma.h
@@ -2,7 +2,7 @@
oniguruma.h - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGURUMA_H
@@ -11,23 +11,12 @@
#include "php_compat.h"
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 1
-#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 1
-
-/* config parameters */
-#ifndef RE_NREGS
-#define RE_NREGS 10
-#endif
-#define REG_NREGION RE_NREGS
-#define REG_MAX_BACKREF_NUM 1000
-#define REG_MAX_REPEAT_NUM 100000
-#define REG_MAX_MULTI_BYTE_RANGES_NUM 1000
-/* constants */
-#define REG_MAX_ERROR_MESSAGE_LEN 90
+#define ONIGURUMA_VERSION_MAJOR 2
+#define ONIGURUMA_VERSION_MINOR 2
+#define ONIGURUMA_VERSION_TEENY 2
#ifndef P_
-#ifdef __STDC__
+#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
@@ -42,241 +31,543 @@
#endif
#endif
-#ifndef REG_EXTERN
+#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
-#define REG_EXTERN extern __declspec(dllexport)
-#elif defined(IMPORT)
-#define REG_EXTERN extern __declspec(dllimport)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
-#ifndef REG_EXTERN
-#define REG_EXTERN extern
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
#endif
-#define REG_CHAR_TABLE_SIZE 256
+/* PART: character encoding */
+
+typedef unsigned char UChar;
+typedef unsigned long OnigCodePoint;
+typedef unsigned int OnigDistance;
+
+#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+
+typedef struct {
+ OnigCodePoint from;
+ OnigCodePoint to;
+} OnigCodePointRange;
+
+#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
+typedef struct {
+ int target_num;
+ int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
+ UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
+} OnigEncFoldMatchInfo;
-#define REGCODE_UNDEF ((RegCharEncoding )0)
#if defined(RUBY_PLATFORM) && defined(M17N_H)
-#define REG_RUBY_M17N
-typedef m17n_encoding* RegCharEncoding;
-#define REGCODE_DEFAULT REGCODE_UNDEF
+
+#define ONIG_RUBY_M17N
+typedef m17n_encoding* OnigEncoding;
+
#else
-typedef const char* RegCharEncoding;
-#define MBCTYPE_ASCII 0
-#define MBCTYPE_EUC 1
-#define MBCTYPE_SJIS 2
-#define MBCTYPE_UTF8 3
-
-#define REGCODE_ASCII REG_MBLEN_TABLE[MBCTYPE_ASCII]
-#define REGCODE_UTF8 REG_MBLEN_TABLE[MBCTYPE_UTF8]
-#define REGCODE_EUCJP REG_MBLEN_TABLE[MBCTYPE_EUC]
-#define REGCODE_SJIS REG_MBLEN_TABLE[MBCTYPE_SJIS]
-#define REGCODE_DEFAULT REGCODE_ASCII
-
-REG_EXTERN const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE];
+
+typedef struct {
+ const char len_table[256];
+ const char* name;
+ int max_enc_len;
+ int is_fold_match;
+ int ctype_support_level; /* sb-only/full */
+ int is_continuous_sb_mb; /* code point is continuous from sb to mb */
+ OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
+ int (*code_to_mbclen)(OnigCodePoint code);
+ int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
+ int (*mbc_to_lower)(UChar* p, UChar* lower);
+ int (*mbc_is_case_ambig)(UChar* p);
+ int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
+ int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
+ UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
+ int (*is_allowed_reverse_match)(UChar* p, UChar* e);
+ int (*get_all_fold_match_code)(OnigCodePoint** codes);
+ int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
+} OnigEncodingType;
+
+typedef OnigEncodingType* OnigEncoding;
+
+ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
+ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+
+#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
+#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
+#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
+#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
+#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
+#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
+#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
+#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
+#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
+#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
+#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
+#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
+#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
+#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
+#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
+#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
+#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
+#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
+#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
+#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
+#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
+#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
+#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
+#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
+#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+
#endif /* else RUBY && M17N */
-REG_EXTERN RegCharEncoding RegDefaultCharEncoding;
+#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+
+
+/* work size */
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
+
+/* character types */
+#define ONIGENC_CTYPE_ALPHA (1<< 0)
+#define ONIGENC_CTYPE_BLANK (1<< 1)
+#define ONIGENC_CTYPE_CNTRL (1<< 2)
+#define ONIGENC_CTYPE_DIGIT (1<< 3)
+#define ONIGENC_CTYPE_GRAPH (1<< 4)
+#define ONIGENC_CTYPE_LOWER (1<< 5)
+#define ONIGENC_CTYPE_PRINT (1<< 6)
+#define ONIGENC_CTYPE_PUNCT (1<< 7)
+#define ONIGENC_CTYPE_SPACE (1<< 8)
+#define ONIGENC_CTYPE_UPPER (1<< 9)
+#define ONIGENC_CTYPE_XDIGIT (1<<10)
+#define ONIGENC_CTYPE_WORD (1<<11)
+#define ONIGENC_CTYPE_ASCII (1<<12)
+#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
+
+/* ctype support level */
+#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
+#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
+
+
+#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
+
+#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
+#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
+#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
+#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
+#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
+#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
+ (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
+#define ONIGENC_IS_MBC_WORD(enc,s,end) \
+ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
+
+
+#ifdef ONIG_RUBY_M17N
+
+#include <ctype.h> /* for isblank(), isgraph() */
+
+#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
+#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
+
+#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
+#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
+#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ onigenc_is_allowed_reverse_match(enc, s, end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ onigenc_get_left_adjust_char_head(enc, start, s)
+#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
+#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+ ONIG_NO_SUPPORT_CONFIG
+#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
+#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) \
+ (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
+ : ONIG_INFINITE_DISTANCE)
+#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
+
+#if 0
+#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
+#endif
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
+ onigenc_is_code_ctype(enc,code,ctype)
-#if defined(RUBY_PLATFORM) && !defined(M17N_H)
-#undef ismbchar
-#define ismbchar(c) (mbclen((c)) != 1)
-#define mbclen(c) RegDefaultCharEncoding[(unsigned char )(c)]
+#ifdef isblank
+# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
+#else
+# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
+#endif
+#ifdef isgraph
+# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
+#else
+# define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ (isprint((int )code) && !isspace((int )code))
#endif
-typedef unsigned int RegOptionType;
-typedef unsigned char* RegTransTableType;
-typedef unsigned int RegDistance;
-typedef unsigned char UChar;
+#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
+#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
+#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
+#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
+#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
+
+ONIG_EXTERN
+int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
+ONIG_EXTERN
+int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN
+int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
+ONIG_EXTERN
+int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
+ONIG_EXTERN
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
+
+#else /* ONIG_RUBY_M17N */
+
+#define ONIGENC_NAME(enc) ((enc)->name)
+
+#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
+#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
+
+#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
+#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
+#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ (enc)->is_allowed_reverse_match(s,end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ (enc)->left_adjust_char_head(start, s)
+#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
+ (enc)->get_all_fold_match_code(codes)
+#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
+ (enc)->get_fold_match_info(p,end,info)
+#define ONIGENC_STEP_BACK(enc,start,s,n) \
+ onigenc_step_back((enc),(start),(s),(n))
+
+#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
+#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
+#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
+
+#define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
+#define ONIGENC_IS_CODE_PRINT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
+#define ONIGENC_IS_CODE_LOWER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
+#define ONIGENC_IS_CODE_UPPER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
+#define ONIGENC_IS_CODE_SPACE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
+#define ONIGENC_IS_CODE_BLANK(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
+#define ONIGENC_IS_CODE_WORD(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
+
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+ (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
+
+ONIG_EXTERN
+UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
+
+#endif /* is not ONIG_RUBY_M17N */
+
+
+/* encoding API */
+ONIG_EXTERN
+int onigenc_init P_(());
+ONIG_EXTERN
+int onigenc_set_default_encoding P_((OnigEncoding enc));
+ONIG_EXTERN
+OnigEncoding onigenc_get_default_encoding P_(());
+ONIG_EXTERN
+void onigenc_set_default_caseconv_table P_((UChar* table));
+ONIG_EXTERN
+UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
+ONIG_EXTERN
+UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+ONIG_EXTERN
+UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+ONIG_EXTERN
+UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+
+
+
+/* PART: regular expression */
+
+/* config parameters */
+#define ONIG_NREGION 10
+#define ONIG_MAX_BACKREF_NUM 1000
+#define ONIG_MAX_REPEAT_NUM 100000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000
+/* constants */
+#define ONIG_MAX_ERROR_MESSAGE_LEN 90
+
+#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+#undef ismbchar
+#define ismbchar(c) (mbclen((c)) != 1)
+#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
+#endif
-#define REG_OPTION_DEFAULT REG_OPTION_NONE
+typedef unsigned int OnigOptionType;
-/* GNU regex options */
-#define RE_OPTION_IGNORECASE (1L)
-#define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE << 1)
-#define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED << 1)
-#define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE << 1)
-#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
-#define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
/* options */
-#define REG_OPTION_NONE 0
-#define REG_OPTION_SINGLELINE RE_OPTION_SINGLELINE
-#define REG_OPTION_MULTILINE RE_OPTION_MULTILINE
-#define REG_OPTION_IGNORECASE RE_OPTION_IGNORECASE
-#define REG_OPTION_EXTEND RE_OPTION_EXTENDED
-#define REG_OPTION_FIND_LONGEST RE_OPTION_LONGEST
-#define REG_OPTION_FIND_NOT_EMPTY (REG_OPTION_FIND_LONGEST << 1)
-#define REG_OPTION_NEGATE_SINGLELINE (REG_OPTION_FIND_NOT_EMPTY << 1)
-#define REG_OPTION_CAPTURE_ONLY_NAMED_GROUP (REG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_NONE 0
+#define ONIG_OPTION_IGNORECASE 1L
+#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
+#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
+#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
+#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
+#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
+#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
/* options (search time) */
-#define REG_OPTION_NOTBOL (REG_OPTION_CAPTURE_ONLY_NAMED_GROUP << 1)
-#define REG_OPTION_NOTEOL (REG_OPTION_NOTBOL << 1)
-#define REG_OPTION_POSIX_REGION (REG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
+#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
+#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
-#define REG_OPTION_ON(options,regopt) ((options) |= (regopt))
-#define REG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
-#define IS_REG_OPTION_ON(options,option) ((options) & (option))
+#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
+#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
+#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
- RegOptionType options; /* default option */
-} RegSyntaxType;
-
-REG_EXTERN RegSyntaxType RegSyntaxPosixBasic;
-REG_EXTERN RegSyntaxType RegSyntaxPosixExtended;
-REG_EXTERN RegSyntaxType RegSyntaxEmacs;
-REG_EXTERN RegSyntaxType RegSyntaxGrep;
-REG_EXTERN RegSyntaxType RegSyntaxGnuRegex;
-REG_EXTERN RegSyntaxType RegSyntaxJava;
-REG_EXTERN RegSyntaxType RegSyntaxPerl;
-REG_EXTERN RegSyntaxType RegSyntaxRuby;
-
-/* predefined syntaxes (see regcomp.c) */
-#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic)
-#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended)
-#define REG_SYNTAX_EMACS (&RegSyntaxEmacs)
-#define REG_SYNTAX_GREP (&RegSyntaxGrep)
-#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex)
-#define REG_SYNTAX_JAVA (&RegSyntaxJava)
-#define REG_SYNTAX_PERL (&RegSyntaxPerl)
-#define REG_SYNTAX_RUBY (&RegSyntaxRuby)
+ OnigOptionType options; /* default option */
+} OnigSyntaxType;
+
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regparse.c) */
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
-#define REG_SYNTAX_DEFAULT RegDefaultSyntax
-
-REG_EXTERN RegSyntaxType* RegDefaultSyntax;
+ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
/* syntax (operators) */
-#define REG_SYN_OP_ANYCHAR 1 /* . */
-#define REG_SYN_OP_0INF (1<<1) /* * */
-#define REG_SYN_OP_ESC_0INF (1<<2)
-#define REG_SYN_OP_1INF (1<<3) /* + */
-#define REG_SYN_OP_ESC_1INF (1<<4)
-#define REG_SYN_OP_01 (1<<5) /* ? */
-#define REG_SYN_OP_ESC_01 (1<<6)
-#define REG_SYN_OP_INTERVAL (1<<7) /* {lower,upper} */
-#define REG_SYN_OP_ESC_INTERVAL (1<<8)
-#define REG_SYN_OP_ALT (1<<9) /* | */
-#define REG_SYN_OP_ESC_ALT (1<<10)
-#define REG_SYN_OP_SUBEXP (1<<11) /* (...) */
-#define REG_SYN_OP_ESC_SUBEXP (1<<12)
-#define REG_SYN_OP_ESC_BUF_ANCHOR (1<<13) /* \A, \Z, \z */
-#define REG_SYN_OP_ESC_GNU_BUF_ANCHOR (1<<14) /* \`, \' */
-#define REG_SYN_OP_BACK_REF (1<<15) /* \num */
-#define REG_SYN_OP_CC (1<<16) /* [...] */
-#define REG_SYN_OP_ESC_WORD (1<<17) /* \w, \W */
-#define REG_SYN_OP_ESC_WORD_BEGIN_END (1<<18) /* \<. \> */
-#define REG_SYN_OP_ESC_WORD_BOUND (1<<19) /* \b, \B */
-#define REG_SYN_OP_ESC_WHITE_SPACE (1<<20) /* \s, \S */
-#define REG_SYN_OP_ESC_DIGIT (1<<21) /* \d, \D */
-#define REG_SYN_OP_LINE_ANCHOR (1<<22) /* ^, $ */
-#define REG_SYN_OP_POSIX_BRACKET (1<<23) /* [:xxxx:] */
-#define REG_SYN_OP_NON_GREEDY (1<<24) /* ??,*?,+?,{n,m}? */
-#define REG_SYN_OP_ESC_CONTROL_CHAR (1<<25) /* \n,\r,\t,\a ... */
-#define REG_SYN_OP_ESC_C_CONTROL (1<<26) /* \cx */
-#define REG_SYN_OP_ESC_OCTAL3 (1<<27) /* \OOO */
-#define REG_SYN_OP_ESC_X_HEX2 (1<<28) /* \xHH */
-#define REG_SYN_OP_ESC_X_BRACE_HEX8 (1<<29) /* \x{7HHHHHHH} */
-#define REG_SYN_OP_SUBEXP_EFFECT (1<<30) /* (?...) */
-#define REG_SYN_OP_QUOTE (1<<31) /* \Q...\E */
-
-#define REG_SYN_OP2_OPTION_PERL (1<<0) /* (?imsx), (?-imsx) */
-#define REG_SYN_OP2_OPTION_RUBY (1<<1) /* (?imx), (?-imx) */
-#define REG_SYN_OP2_POSSESSIVE_REPEAT (1<<2) /* ?+,*+,++ */
-#define REG_SYN_OP2_POSSESSIVE_INTERVAL (1<<3) /* {n,m}+ */
-#define REG_SYN_OP2_CCLASS_SET (1<<4) /* [...&&..[..].] */
-#define REG_SYN_OP2_NAMED_SUBEXP (1<<5) /*(?<name>.),\k<name>*/
-#define REG_SYN_OP2_SUBEXP_CALL (1<<6) /* \g<name> */
-#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<7) /* \C-x */
-#define REG_SYN_OP2_ESC_M_BAR_META (1<<8) /* \M-x */
-#define REG_SYN_OP2_ESC_V_VTAB (1<<9) /* \v as VTAB */
-#define REG_SYN_OP2_ESC_U_HEX4 (1<<10) /* \uHHHH */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
/* syntax (behavior) */
-#define REG_SYN_CONTEXT_INDEP_ANCHORS (1<<0) /* not implemented */
-#define REG_SYN_CONTEXT_INDEP_OPS (1<<1) /* ?, *, +, {n,m} */
-#define REG_SYN_CONTEXT_INVALID_OPS (1<<2) /* error or ignore */
-#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<3) /* ...)... */
-#define REG_SYN_ALLOW_INVALID_INTERVAL (1<<4) /* {??? */
-#define REG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ etc.*/
-#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
-
-/* syntax in char class [...] */
-#define REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED (1<<10) /* [,-,] */
-#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<11)
-#define REG_SYN_ESCAPE_IN_CC (1<<12) /* [...\w..] etc.. */
-#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<13)
-#define REG_SYN_ALLOW_RANGE_OP_IN_CC (1<<14) /* [0-9-a] */
-
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
+
+/* syntax (behavior) in char class [...] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */
+/* syntax (behavior) warning */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */
+
+/* meta character specifiers (onig_set_meta_char()) */
+#define ONIG_META_CHAR_ESCAPE 0
+#define ONIG_META_CHAR_ANYCHAR 1
+#define ONIG_META_CHAR_ANYTIME 2
+#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
+#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
+#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
+
+#define ONIG_INEFFECTIVE_META_CHAR 0
/* error codes */
-#define REG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -300)
+#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
/* normal return */
-#define REG_NORMAL 0
-#define REG_MISMATCH -1
-#define REG_NO_SUPPORT_CONFIG -2
+#define ONIG_NORMAL 0
+#define ONIG_MISMATCH -1
+#define ONIG_NO_SUPPORT_CONFIG -2
/* internal error */
-#define REGERR_MEMORY -5
-#define REGERR_MATCH_STACK_LIMIT_OVER -6
-#define REGERR_TYPE_BUG -10
-#define REGERR_PARSER_BUG -11
-#define REGERR_STACK_BUG -12
-#define REGERR_UNDEFINED_BYTECODE -13
-#define REGERR_UNEXPECTED_BYTECODE -14
-#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
-#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+#define ONIGERR_PARSER_BUG -11
+#define ONIGERR_STACK_BUG -12
+#define ONIGERR_UNDEFINED_BYTECODE -13
+#define ONIGERR_UNEXPECTED_BYTECODE -14
+#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
+#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+/* general error */
+#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */
-#define REGERR_END_PATTERN_AT_LEFT_BRACE -100
-#define REGERR_END_PATTERN_AT_LEFT_BRACKET -101
-#define REGERR_EMPTY_CHAR_CLASS -102
-#define REGERR_PREMATURE_END_OF_CHAR_CLASS -103
-#define REGERR_END_PATTERN_AT_BACKSLASH -104
-#define REGERR_END_PATTERN_AT_META -105
-#define REGERR_END_PATTERN_AT_CONTROL -106
-#define REGERR_META_CODE_SYNTAX -108
-#define REGERR_CONTROL_CODE_SYNTAX -109
-#define REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
-#define REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
-#define REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
-#define REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
-#define REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
-#define REGERR_NESTED_REPEAT_OPERATOR -115
-#define REGERR_UNMATCHED_CLOSE_PARENTHESIS -116
-#define REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
-#define REGERR_END_PATTERN_IN_GROUP -118
-#define REGERR_UNDEFINED_GROUP_OPTION -119
-#define REGERR_INVALID_POSIX_BRACKET_TYPE -121
-#define REGERR_INVALID_LOOK_BEHIND_PATTERN -122
-#define REGERR_INVALID_REPEAT_RANGE_PATTERN -123
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
+#define ONIGERR_EMPTY_CHAR_CLASS -102
+#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
+#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
+#define ONIGERR_END_PATTERN_AT_META -105
+#define ONIGERR_END_PATTERN_AT_CONTROL -106
+#define ONIGERR_META_CODE_SYNTAX -108
+#define ONIGERR_CONTROL_CODE_SYNTAX -109
+#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
+#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
+#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
+#define ONIGERR_NESTED_REPEAT_OPERATOR -115
+#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
+#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
+#define ONIGERR_END_PATTERN_IN_GROUP -118
+#define ONIGERR_UNDEFINED_GROUP_OPTION -119
+#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
+#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
+#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
/* values error (syntax error) */
-#define REGERR_TOO_BIG_NUMBER -200
-#define REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
-#define REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
-#define REGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
-#define REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
-#define REGERR_TOO_MANY_MULTI_BYTE_RANGES -205
-#define REGERR_TOO_SHORT_MULTI_BYTE_STRING -206
-#define REGERR_TOO_BIG_BACKREF_NUMBER -207
-#define REGERR_INVALID_BACKREF -208
-#define REGERR_TOO_BIG_WIDE_CHAR_VALUE -209
-#define REGERR_TOO_LONG_WIDE_CHAR_VALUE -210
-#define REGERR_INVALID_WIDE_CHAR_VALUE -211
-#define REGERR_INVALID_SUBEXP_NAME -212
-#define REGERR_UNDEFINED_NAME_REFERENCE -213
-#define REGERR_UNDEFINED_GROUP_REFERENCE -214
-#define REGERR_MULTIPLEX_DEFINITION_NAME_CALL -215
-#define REGERR_NEVER_ENDING_RECURSION -216
+#define ONIGERR_TOO_BIG_NUMBER -200
+#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
+#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
+#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
+#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
+#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
+#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
+#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
+#define ONIGERR_INVALID_BACKREF -208
+#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
+#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
+#define ONIGERR_EMPTY_GROUP_NAME -214
+#define ONIGERR_INVALID_GROUP_NAME -215
+#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
+#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
+#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
+#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
+#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
+#define ONIGERR_NEVER_ENDING_RECURSION -221
+#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
+#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
/* errors related to thread */
-#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT -301
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
+#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
+ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
/* match result region type */
struct re_registers {
@@ -284,164 +575,143 @@ struct re_registers {
int num_regs;
int* beg;
int* end;
+ /* extended */
+ struct re_registers** list; /* capture history. list[1]-list[31] */
};
-#define REG_REGION_NOTPOS -1
+#define ONIG_REGION_NOTPOS -1
-typedef struct re_registers RegRegion;
+typedef struct re_registers OnigRegion;
typedef struct {
UChar* par;
UChar* par_end;
-} RegErrorInfo;
+} OnigErrorInfo;
typedef struct {
int lower;
int upper;
-} RegRepeatRange;
+} OnigRepeatRange;
+
+typedef void (*OnigWarnFunc) P_((char* s));
+extern void onig_null_warn P_((char* s));
+#define ONIG_NULL_WARN onig_null_warn
+
+#define ONIG_CHAR_TABLE_SIZE 256
/* regex_t state */
-#define REG_STATE_NORMAL 0
-#define REG_STATE_SEARCHING 1
-#define REG_STATE_COMPILING -1
-#define REG_STATE_MODIFY -2
+#define ONIG_STATE_NORMAL 0
+#define ONIG_STATE_SEARCHING 1
+#define ONIG_STATE_COMPILING -1
+#define ONIG_STATE_MODIFY -2
-#define REG_STATE(regex) \
- ((regex)->state > 0 ? REG_STATE_SEARCHING : (regex)->state)
+#define ONIG_STATE(reg) \
+ ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
typedef struct re_pattern_buffer {
- /* common members in BBuf(bytes-buffer) type */
+ /* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
unsigned int used; /* used space for p */
unsigned int alloc; /* allocated space for p */
- int state; /* normal, searching, compiling */
- int num_mem; /* used memory(...) num counted from 1 */
- int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_NULL_CHECK_START/END id counter */
- int num_call; /* number of subexp call */
- unsigned int backtrack_mem;
+ int state; /* normal, searching, compiling */
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
- RegRepeatRange* repeat_range;
+ OnigRepeatRange* repeat_range;
- RegCharEncoding enc;
- RegOptionType options;
- RegSyntaxType* syntax;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
void* name_table;
- /* optimize info (string search and char-map and anchor) */
+ /* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- RegDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- RegDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
- unsigned char map[REG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
- RegDistance dmin; /* min-distance of exact or map */
- RegDistance dmax; /* max-distance of exact or map */
+ OnigDistance dmin; /* min-distance of exact or map */
+ OnigDistance dmax; /* max-distance of exact or map */
/* regex_t link chain */
- struct re_pattern_buffer* chain; /* escape compile-conflict on multi-thread */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
} regex_t;
-#ifdef RUBY_PLATFORM
-#define re_mbcinit ruby_re_mbcinit
-#define re_compile_pattern ruby_re_compile_pattern
-#define re_recompile_pattern ruby_re_recompile_pattern
-#define re_free_pattern ruby_re_free_pattern
-#define re_adjust_startpos ruby_re_adjust_startpos
-#define re_search ruby_re_search
-#define re_match ruby_re_match
-#define re_set_casetable ruby_re_set_casetable
-#define re_copy_registers ruby_re_copy_registers
-#define re_free_registers ruby_re_free_registers
-#define register_info_type ruby_register_info_type
-#define re_error_code_to_str ruby_error_code_to_str
-
-#define ruby_error_code_to_str regex_error_code_to_str
-#define ruby_re_copy_registers regex_region_copy
-#else
-#define re_error_code_to_str regex_error_code_to_str
-#define re_copy_registers regex_region_copy
-#endif
/* Oniguruma Native API */
-REG_EXTERN
-int regex_init P_((void));
-REG_EXTERN
-int regex_error_code_to_str PV_((UChar* s, int err_code, ...));
-REG_EXTERN
-int regex_new P_((regex_t**, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo));
-REG_EXTERN
-void regex_free P_((regex_t*));
-REG_EXTERN
-int regex_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo));
-REG_EXTERN
-int regex_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, RegRegion* region, RegOptionType option));
-REG_EXTERN
-int regex_match P_((regex_t*, UChar* str, UChar* end, UChar* at, RegRegion* region, RegOptionType option));
-REG_EXTERN
-RegRegion* regex_region_new P_((void));
-REG_EXTERN
-void regex_region_free P_((RegRegion* region, int free_self));
-REG_EXTERN
-void regex_region_copy P_((RegRegion* to, RegRegion* from));
-REG_EXTERN
-void regex_region_clear P_((RegRegion* region));
-REG_EXTERN
-int regex_region_resize P_((RegRegion* region, int n));
-REG_EXTERN
-int regex_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
+ONIG_EXTERN
+int onig_init P_((void));
+ONIG_EXTERN
+int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
+ONIG_EXTERN
+void onig_set_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+void onig_set_verb_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+void onig_free P_((regex_t*));
+ONIG_EXTERN
+int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+OnigRegion* onig_region_new P_((void));
+ONIG_EXTERN
+void onig_region_free P_((OnigRegion* region, int free_self));
+ONIG_EXTERN
+void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
+ONIG_EXTERN
+void onig_region_clear P_((OnigRegion* region));
+ONIG_EXTERN
+int onig_region_resize P_((OnigRegion* region, int n));
+ONIG_EXTERN
+int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
int** nums));
-REG_EXTERN
-int regex_foreach_name P_((regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg));
-REG_EXTERN
-UChar* regex_get_prev_char_head P_((RegCharEncoding code, UChar* start, UChar* s));
-REG_EXTERN
-UChar* regex_get_left_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s));
-REG_EXTERN
-UChar* regex_get_right_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s));
-REG_EXTERN
-void regex_set_default_trans_table P_((UChar* table));
-REG_EXTERN
-int regex_set_default_syntax P_((RegSyntaxType* syntax));
-REG_EXTERN
-int regex_end P_((void));
-REG_EXTERN
-const char* regex_version P_((void));
-
-
-/* GNU regex API */
-#ifdef REG_RUBY_M17N
-REG_EXTERN
-void re_mbcinit P_((RegCharEncoding));
-#else
-REG_EXTERN
-void re_mbcinit P_((int));
-#endif
-
-REG_EXTERN
-int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
-REG_EXTERN
-int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
-REG_EXTERN
-void re_free_pattern P_((struct re_pattern_buffer*));
-REG_EXTERN
-int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
-REG_EXTERN
-int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
-REG_EXTERN
-int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
-REG_EXTERN
-void re_set_casetable P_((const char*));
-REG_EXTERN
-void re_free_registers P_((struct re_registers*));
-REG_EXTERN
-int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
+ONIG_EXTERN
+int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
+ONIG_EXTERN
+int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
+ONIG_EXTERN
+int onig_number_of_names P_((regex_t* reg));
+ONIG_EXTERN
+OnigEncoding onig_get_encoding P_((regex_t* reg));
+ONIG_EXTERN
+OnigOptionType onig_get_options P_((regex_t* reg));
+ONIG_EXTERN
+OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
+ONIG_EXTERN
+int onig_set_default_syntax P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
+ONIG_EXTERN
+void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
+ONIG_EXTERN
+void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
+ONIG_EXTERN
+void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
+ONIG_EXTERN
+void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
+ONIG_EXTERN
+int onig_set_meta_char P_((unsigned int what, unsigned int c));
+ONIG_EXTERN
+int onig_end P_((void));
+ONIG_EXTERN
+const char* onig_version P_((void));
#endif /* ONIGURUMA_H */
diff --git a/ext/mbstring/oniguruma/php_compat.h b/ext/mbstring/oniguruma/php_compat.h
index c261cc2bb0..d7f1ad56f1 100644
--- a/ext/mbstring/oniguruma/php_compat.h
+++ b/ext/mbstring/oniguruma/php_compat.h
@@ -1,46 +1,7 @@
#ifndef _PHP_MBREGEX_COMPAT_H
#define _PHP_MBREGEX_COMPAT_H
-#define RegCharEncoding php_mb_reg_char_encoding
-#define RegRegion php_mb_reg_region
-#define RegDefaultCharEncoding php_mb_reg_default_char_encoding
-#define REG_MBLEN_TABLE PHP_MBSTR_REG_MBLEN_TABLE
-#define RegSyntaxType php_mb_reg_syntax_type
-#define RegOptionType php_mb_reg_option_type
-#define re_registers php_mb_re_registers
-#define RegErrorInfo php_mb_reg_error_info
#define re_pattern_buffer php_mb_re_pattern_buffer
#define regex_t php_mb_regex_t
-#define regex_init php_mb_regex_init
-#define regex_new php_mb_regex_new
-#define regex_free php_mb_regex_free
-#define regex_recompile php_mb_regex_recompile
-#define regex_search php_mb_regex_search
-#define regex_match php_mb_regex_match
-#define regex_region_new php_mb_regex_region_new
-#define regex_region_free php_mb_regex_region_free
-#define regex_region_copy php_mb_regex_region_copy
-#define regex_region_clear php_mb_regex_region_clear
-#define regex_region_resize php_mb_regex_region_resize
-#define regex_name_to_group_numbers php_mb_regex_name_to_group_numbers
-#define regex_foreach_names php_mb_regex_foreach_names
-#define regex_get_prev_char_head php_mb_regex_get_prev_char_head
-#define regex_get_left_adjust_char_head php_mb_get_left_adjust_char_head
-#define regex_get_right_adjust_char_head php_mb_get_right_adjust_char_head
-#define regex_set_default_trans_table php_mb_get_default_trans_table
-#define regex_set_default_syntax php_mb_regex_set_default_syntax
-#define regex_end php_mb_regex_end
-#define re_mbcinit php_mb_re_mbcinit
-#define re_compile_pattern php_mb_re_compile_pattern
-#define re_recompile_pattern php_mb_re_recompile_pattern
-#define re_free_pattern php_mb_re_free_pattern
-#define re_adjust_startpos php_mb_re_adjust_startpos
-#define re_search php_mb_re_search
-#define re_match php_mb_re_match
-#define re_set_casetable php_mb_re_set_casetable
-#define php_mbregex_region_copy php_mb_re_copy_registers
-#define re_free_registers php_mb_re_free_registers
-#define register_info_type php_mb_register_info_type
-#define regex_error_code_to_str php_mb_regex_error_code_to_str
#endif /* _PHP_MBREGEX_COMPAT_H */
diff --git a/ext/mbstring/oniguruma/re.c.168.patch b/ext/mbstring/oniguruma/re.c.168.patch
deleted file mode 100644
index fd1c1bf5d8..0000000000
--- a/ext/mbstring/oniguruma/re.c.168.patch
+++ /dev/null
@@ -1,56 +0,0 @@
---- re.c.ruby_orig Tue Feb 4 15:52:29 2003
-+++ re.c Tue Mar 18 19:37:49 2003
-@@ -380,7 +380,8 @@ make_regexp(s, len, flag)
- int len, flag;
- {
- Regexp *rp;
-- char *err;
-+ char err[REG_MAX_ERROR_MESSAGE_LEN];
-+ int r;
-
- /* Handle escaped characters first. */
-
-@@ -389,16 +390,17 @@ make_regexp(s, len, flag)
- from that.
- */
-
-- rp = ALLOC(Regexp);
-- MEMZERO((char *)rp, Regexp, 1);
-- rp->buffer = ALLOC_N(char, 16);
-- rp->allocated = 16;
-- rp->fastmap = ALLOC_N(char, 256);
-+ r = re_alloc_pattern(&rp);
-+ if (r) {
-+ re_error_code_to_str(err, r);
-+ rb_reg_raise(s, len, err, 0);
-+ }
-+
- if (flag) {
- rp->options = flag;
- }
-- err = re_compile_pattern(s, len, rp);
-- if (err != NULL) {
-+ r = re_compile_pattern(s, len, rp, err);
-+ if (r != 0) {
- rb_reg_raise(s, len, err, 0);
- }
-
-@@ -532,14 +534,14 @@ rb_reg_prepare_re(re)
- }
-
- if (need_recompile) {
-- char *err;
-+ char err[REG_MAX_ERROR_MESSAGE_LEN];
-+ int r;
-
- if (FL_TEST(re, KCODE_FIXED))
- kcode_set_option(re);
- rb_reg_check(re);
-- RREGEXP(re)->ptr->fastmap_accurate = 0;
-- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
-- if (err != NULL) {
-+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
-+ if (r != 0) {
- rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
- }
- }
diff --git a/ext/mbstring/oniguruma/re.c.180.patch b/ext/mbstring/oniguruma/re.c.180.patch
deleted file mode 100644
index 08ef2397fb..0000000000
--- a/ext/mbstring/oniguruma/re.c.180.patch
+++ /dev/null
@@ -1,66 +0,0 @@
---- re.c.ruby_orig Fri Feb 7 15:35:26 2003
-+++ re.c Tue Mar 18 18:51:21 2003
-@@ -444,7 +444,7 @@ rb_reg_to_s(re)
- kcode_set_option(re);
- rp = ALLOC(Regexp);
- MEMZERO((char *)rp, Regexp, 1);
-- err = re_compile_pattern(++ptr, len -= 2, rp) != 0;
-+ err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0);
- kcode_reset_option();
- re_free_pattern(rp);
- }
-@@ -538,7 +538,8 @@ make_regexp(s, len, flags)
- int flags;
- {
- Regexp *rp;
-- char *err;
-+ char err[REG_MAX_ERROR_MESSAGE_LEN];
-+ int r;
-
- /* Handle escaped characters first. */
-
-@@ -547,17 +548,18 @@ make_regexp(s, len, flags)
- from that.
- */
-
-- rp = ALLOC(Regexp);
-- MEMZERO((char *)rp, Regexp, 1);
-- rp->buffer = ALLOC_N(char, 16);
-- rp->allocated = 16;
-- rp->fastmap = ALLOC_N(char, 256);
-+ r = re_alloc_pattern(&rp);
-+ if (r) {
-+ re_error_code_to_str((UChar* )err, r);
-+ rb_reg_raise(s, len, err, 0);
-+ }
-+
- if (flags) {
- rp->options = flags;
- }
-- err = re_compile_pattern(s, len, rp);
-+ r = re_compile_pattern(s, len, rp, err);
-
-- if (err != NULL) {
-+ if (r != 0) {
- rb_reg_raise(s, len, err, 0);
- }
- return rp;
-@@ -692,14 +694,14 @@ rb_reg_prepare_re(re)
- }
-
- if (need_recompile) {
-- char *err;
-+ char err[REG_MAX_ERROR_MESSAGE_LEN];
-+ int r;
-
- if (FL_TEST(re, KCODE_FIXED))
- kcode_set_option(re);
- rb_reg_check(re);
-- RREGEXP(re)->ptr->fastmap_accurate = 0;
-- err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
-- if (err != NULL) {
-+ r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
-+ if (r != 0) {
- rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
- }
- }
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
index fd8e56a7a7..9a89b92ecb 100644
--- a/ext/mbstring/oniguruma/regcomp.c
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -2,12 +2,12 @@
regcomp.c - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regparse.h"
-#ifndef UNALIGNED_WORD_ACCESS
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
@@ -18,83 +18,28 @@ swap_node(Node* a, Node* b)
c = *a; *a = *b; *b = c;
}
-static RegDistance
-distance_add(RegDistance d1, RegDistance d2)
+static OnigDistance
+distance_add(OnigDistance d1, OnigDistance d2)
{
- if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE)
- return INFINITE_DISTANCE;
+ if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
+ return ONIG_INFINITE_DISTANCE;
else {
- if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2;
- else return INFINITE_DISTANCE;
+ if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
+ else return ONIG_INFINITE_DISTANCE;
}
}
-static RegDistance
-distance_multiply(RegDistance d, int m)
+static OnigDistance
+distance_multiply(OnigDistance d, int m)
{
if (m == 0) return 0;
- if (d < INFINITE_DISTANCE / m)
+ if (d < ONIG_INFINITE_DISTANCE / m)
return d * m;
else
- return INFINITE_DISTANCE;
+ return ONIG_INFINITE_DISTANCE;
}
-#if 0
-static RegDistance
-distance_distance(RegDistance d1, RegDistance d2)
-{
- if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE)
- return INFINITE_DISTANCE;
-
- if (d1 > d2) return d1 - d2;
- else return d2 - d1;
-}
-#endif
-
-RegCharEncoding RegDefaultCharEncoding = REGCODE_DEFAULT;
-static UChar AmbiguityTable[REG_CHAR_TABLE_SIZE];
-
-#define IS_AMBIGUITY_CHAR(enc, c) (AmbiguityTable[(c)] >= 2)
-
-#ifdef DEFAULT_TRANSTABLE_EXIST
-
-static UChar DTT[] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-#endif
-
static int
bitset_is_empty(BitSetRef bs)
{
@@ -105,7 +50,7 @@ bitset_is_empty(BitSetRef bs)
return 1;
}
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
static int
bitset_on_num(BitSetRef bs)
{
@@ -120,10 +65,10 @@ bitset_on_num(BitSetRef bs)
#endif
extern int
-regex_bbuf_init(BBuf* buf, int size)
+onig_bbuf_init(BBuf* buf, int size)
{
buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(REGERR_MEMORY);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
buf->alloc = size;
buf->used = 0;
@@ -139,7 +84,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size)
UnsetAddr* p;
p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
uslist->num = 0;
uslist->alloc = size;
uslist->us = p;
@@ -162,7 +107,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
if (uslist->num >= uslist->alloc) {
size = uslist->alloc * 2;
p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
uslist->alloc = size;
uslist->us = p;
}
@@ -175,122 +120,9 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
#endif /* USE_SUBEXP_CALL */
-#ifdef REG_RUBY_M17N
-
-extern int
-regex_is_allow_reverse_match(RegCharEncoding enc, UChar* s, UChar* end)
-{
- return IS_INDEPENDENT_TRAIL(enc);
-}
-
-#else /* REG_RUBY_M17N */
-
-const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE] = {
- { /* ascii */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
- { /* euc-jp */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
- },
- { /* sjis */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
- },
- { /* utf8 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
- }
-};
-
-extern int
-regex_mb_max_length(RegCharEncoding code)
-{
- /* can't use switch statement, code isn't int type. */
- if (code == REGCODE_ASCII) return 1;
- else if (code == REGCODE_EUCJP) return 3;
- else if (code == REGCODE_SJIS) return 2;
- else return 6; /* REGCODE_UTF8 */
-}
-
-extern int
-regex_is_allow_reverse_match(RegCharEncoding enc, UChar* s, UChar* end)
-{
- UChar c;
-
- if (IS_INDEPENDENT_TRAIL(enc)) return 1;
-
- c = *s;
- if (enc == REGCODE_EUCJP) {
- if (c <= 0x7e || c == 0x8e || c == 0x8f) return 1;
- }
- else if (enc == REGCODE_SJIS) {
- if (c <= 0x3f || c == 0x7f) return 1;
- }
- return 0;
-}
-
-#endif /* not REG_RUBY_M17N */
-
+#if 0
static int
-bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc)
+bitset_mbmaxlen(BitSetRef bs, int negative, OnigEncoding enc)
{
int i;
int len, maxlen = 0;
@@ -298,7 +130,7 @@ bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc)
if (negative) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! BITSET_AT(bs, i)) {
- len = mblen(enc, i);
+ len = enc_len(enc, i);
if (len > maxlen) maxlen = len;
}
}
@@ -306,14 +138,14 @@ bitset_mbmaxlen(BitSetRef bs, int negative, RegCharEncoding enc)
else {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(bs, i)) {
- len = mblen(enc, i);
+ len = enc_len(enc, i);
if (len > maxlen) maxlen = len;
}
}
}
return maxlen;
}
-
+#endif
static int
add_opcode(regex_t* reg, int opcode)
@@ -327,7 +159,7 @@ add_rel_addr(regex_t* reg, int addr)
{
RelAddrType ra = (RelAddrType )addr;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_RELADDR);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -342,7 +174,7 @@ add_abs_addr(regex_t* reg, int addr)
{
AbsAddrType ra = (AbsAddrType )addr;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_ABSADDR);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -357,7 +189,7 @@ add_length(regex_t* reg, int len)
{
LengthType l = (LengthType )len;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &l, SIZE_LENGTH);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -372,7 +204,7 @@ add_mem_num(regex_t* reg, int num)
{
MemNumType n = (MemNumType )num;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &n, SIZE_MEMNUM);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -388,7 +220,7 @@ add_repeat_num(regex_t* reg, int num)
{
RepeatNumType n = (RepeatNumType )num;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &n, SIZE_REPEATNUM);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -400,9 +232,9 @@ add_repeat_num(regex_t* reg, int num)
#endif
static int
-add_option(regex_t* reg, RegOptionType option)
+add_option(regex_t* reg, OnigOptionType option)
{
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &option, SIZE_OPTION);
#else
UChar buf[SERIALIZE_BUFSIZE];
@@ -438,7 +270,7 @@ add_bitset(regex_t* reg, BitSetRef bs)
}
static int
-add_opcode_option(regex_t* reg, int opcode, RegOptionType option)
+add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
{
int r;
@@ -502,12 +334,12 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_check)
+compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
{
int r;
int saved_num_null_check = reg->num_null_check;
- if (empty_check) {
+ if (empty_info != 0) {
r = add_opcode(reg, OP_NULL_CHECK_START);
if (r) return r;
r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
@@ -518,8 +350,14 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_check)
r = compile_tree(node, reg);
if (r) return r;
- if (empty_check) {
- r = add_opcode(reg, OP_NULL_CHECK_END);
+ if (empty_info != 0) {
+ if (empty_info == NQ_TARGET_IS_EMPTY)
+ r = add_opcode(reg, OP_NULL_CHECK_END);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
+
if (r) return r;
r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
}
@@ -594,7 +432,7 @@ static int
compile_length_string_node(StrNode* sn, regex_t* reg)
{
int rlen, r, len, prev_len, slen, ambig, ic;
- RegCharEncoding code = reg->enc;
+ OnigEncoding enc = reg->enc;
UChar *p, *prev;
if (sn->end <= sn->s)
@@ -603,9 +441,9 @@ compile_length_string_node(StrNode* sn, regex_t* reg)
ic = IS_IGNORECASE(reg->options);
p = prev = sn->s;
- prev_len = mblen(code, *p);
+ prev_len = enc_len(enc, *p);
if (ic != 0 && prev_len == 1)
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
else
ambig = 0;
@@ -614,18 +452,18 @@ compile_length_string_node(StrNode* sn, regex_t* reg)
rlen = 0;
for (; p < sn->end; ) {
- len = mblen(code, *p);
+ len = enc_len(enc, *p);
if (len == prev_len) {
slen++;
if (ic != 0 && ambig == 0 && len == 1)
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
}
else {
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
rlen += r;
if (ic != 0 && len == 1)
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
else
ambig = 0;
@@ -654,7 +492,7 @@ static int
compile_string_node(StrNode* sn, regex_t* reg)
{
int r, len, prev_len, slen, ambig, ic;
- RegCharEncoding code = reg->enc;
+ OnigEncoding enc = reg->enc;
UChar *p, *prev;
if (sn->end <= sn->s)
@@ -663,10 +501,11 @@ compile_string_node(StrNode* sn, regex_t* reg)
ic = IS_IGNORECASE(reg->options);
p = prev = sn->s;
- prev_len = mblen(code, *p);
+ prev_len = enc_len(enc, *p);
if (ic != 0 && prev_len == 1) {
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
- if (ambig != 0) *p = TOLOWER(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0)
+ ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
}
else
ambig = 0;
@@ -675,21 +514,21 @@ compile_string_node(StrNode* sn, regex_t* reg)
slen = 1;
for (; p < sn->end; ) {
- len = mblen(code, *p);
+ len = enc_len(enc, *p);
if (len == prev_len) {
slen++;
if (ic != 0 && len == 1) {
if (ambig == 0)
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
- if (ambig != 0) *p = TOLOWER(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
}
}
else {
r = add_compile_string(prev, prev_len, slen, reg, ambig);
if (r) return r;
if (ic != 0 && len == 1) {
- ambig = IS_AMBIGUITY_CHAR(reg->enc, *p);
- if (ambig != 0) *p = TOLOWER(reg->enc, *p);
+ ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
+ if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
}
else
ambig = 0;
@@ -714,20 +553,20 @@ compile_string_raw_node(StrNode* sn, regex_t* reg)
}
static int
-add_multi_byte_cclass_offset(BBuf* mbuf, regex_t* reg, int offset)
+add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
{
-#ifdef UNALIGNED_WORD_ACCESS
- add_length(reg, mbuf->used - offset);
- return add_bytes(reg, mbuf->p + offset, mbuf->used - offset);
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ add_length(reg, mbuf->used);
+ return add_bytes(reg, mbuf->p, mbuf->used);
#else
int r, pad_size;
UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
GET_ALIGNMENT_PAD_SIZE(p, pad_size);
- add_length(reg, mbuf->used - offset + (WORD_ALIGNMENT_SIZE - 1));
+ add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
- r = add_bytes(reg, mbuf->p + offset, mbuf->used - offset);
+ r = add_bytes(reg, mbuf->p, mbuf->used);
/* padding for return value from compile_length_cclass_node() to be fix. */
pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
@@ -747,12 +586,12 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
else {
if (bitset_is_empty(cc->bs)) {
/* SIZE_BITSET is included in mbuf->used. */
- len = SIZE_OPCODE - SIZE_BITSET;
+ len = SIZE_OPCODE;
}
else {
- len = SIZE_OPCODE;
+ len = SIZE_OPCODE + SIZE_BITSET;
}
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
len += SIZE_LENGTH + cc->mbuf->used;
#else
len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
@@ -778,7 +617,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
else add_opcode(reg, OP_CCLASS_MB);
- r = add_multi_byte_cclass_offset(cc->mbuf, reg, SIZE_BITSET);
+ r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
@@ -786,7 +625,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
r = add_bitset(reg, cc->bs);
if (r) return r;
- r = add_multi_byte_cclass_offset(cc->mbuf, reg, SIZE_BITSET);
+ r = add_multi_byte_cclass(cc->mbuf, reg);
}
}
@@ -798,20 +637,20 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
{
#define REPEAT_RANGE_ALLOC 4
- RegRepeatRange* p;
+ OnigRepeatRange* p;
if (reg->repeat_range_alloc == 0) {
- p = (RegRepeatRange* )xmalloc(sizeof(RegRepeatRange) * REPEAT_RANGE_ALLOC);
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
reg->repeat_range = p;
reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
}
else if (reg->repeat_range_alloc <= id) {
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (RegRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(RegRepeatRange) * n);
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
+ sizeof(OnigRepeatRange) * n);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
}
@@ -825,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
static int
-compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_check,
+compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
regex_t* reg)
{
int r;
@@ -842,7 +681,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_check,
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_check);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
@@ -858,7 +697,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_check = (infinite && qn->target_may_empty);
+ int empty_info = qn->target_empty_info;
int tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
@@ -873,7 +712,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
}
- if (empty_check)
+ if (empty_info != 0)
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
else
mod_tlen = tlen;
@@ -932,7 +771,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_check = (infinite && qn->target_may_empty);
+ int empty_info = qn->target_empty_info;
int tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
@@ -941,15 +780,22 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
if (r) return r;
return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
}
- else
- return add_opcode(reg, OP_ANYCHAR_STAR);
+ else {
+ if (IS_MULTILINE(reg->options))
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ else
+ return add_opcode(reg, OP_ANYCHAR_STAR);
+ }
}
- if (empty_check)
+ if (empty_info != 0)
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
else
mod_tlen = tlen;
@@ -981,7 +827,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen + SIZE_OP_JUMP);
if (r) return r;
add_bytes(reg, NSTRING(qn->head_exact).s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_check);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH_OR_JUMP_EXACT1));
@@ -991,7 +837,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
mod_tlen + SIZE_OP_JUMP);
if (r) return r;
add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_check);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH_IF_PEEK_NEXT));
@@ -999,7 +845,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
else {
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_check);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + SIZE_OP_JUMP + SIZE_OP_PUSH));
@@ -1008,7 +854,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
else {
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_check);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + SIZE_OP_PUSH));
}
@@ -1041,7 +887,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
r = compile_tree(qn->target, reg);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_check, reg);
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
}
return r;
}
@@ -1050,7 +896,7 @@ static int
compile_length_option_node(EffectNode* node, regex_t* reg)
{
int tlen;
- RegOptionType prev = reg->options;
+ OnigOptionType prev = reg->options;
reg->options = node->option;
tlen = compile_length_tree(node->target, reg);
@@ -1058,29 +904,39 @@ compile_length_option_node(EffectNode* node, regex_t* reg)
if (tlen < 0) return tlen;
- return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
- + tlen + SIZE_OP_SET_OPTION;
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
+ + tlen + SIZE_OP_SET_OPTION;
+ }
+ else
+ return tlen;
}
static int
compile_option_node(EffectNode* node, regex_t* reg)
{
int r;
- RegOptionType prev = reg->options;
-
- r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
- if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL);
- if (r) return r;
+ OnigOptionType prev = reg->options;
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
- if (r) return r;
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL);
+ if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ }
+ else {
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+ }
return r;
}
@@ -1106,7 +962,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
if (IS_EFFECT_CALLED(node)) {
len = SIZE_OP_MEMORY_START_PUSH + tlen
+ SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (IS_FIND_CONDITION(reg->options))
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
len += (IS_EFFECT_RECURSION(node)
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
@@ -1116,12 +972,12 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
else
#endif
{
- if (BIT_STATUS_AT(reg->backtrack_mem, node->regnum))
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
len = SIZE_OP_MEMORY_START_PUSH;
else
len = SIZE_OP_MEMORY_START;
- len += tlen + (IS_FIND_CONDITION(reg->options)
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
}
break;
@@ -1141,7 +997,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
break;
default:
- return REGERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
break;
}
@@ -1170,7 +1026,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
if (r) return r;
len = compile_length_tree(node->target, reg);
len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (IS_FIND_CONDITION(reg->options))
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
len += (IS_EFFECT_RECURSION(node)
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
@@ -1181,7 +1037,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
if (r) return r;
}
#endif
- if (BIT_STATUS_AT(reg->backtrack_mem, node->regnum))
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
r = add_opcode(reg, OP_MEMORY_START_PUSH);
else
r = add_opcode(reg, OP_MEMORY_START);
@@ -1192,7 +1048,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
if (r) return r;
#ifdef USE_SUBEXP_CALL
if (IS_EFFECT_CALLED(node)) {
- if (IS_FIND_CONDITION(reg->options))
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
else
@@ -1207,7 +1063,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
else
#endif
{
- if (IS_FIND_CONDITION(reg->options))
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
r = add_opcode(reg, OP_MEMORY_END_PUSH);
else
r = add_opcode(reg, OP_MEMORY_END);
@@ -1244,7 +1100,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
break;
default:
- return REGERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
break;
}
@@ -1329,7 +1185,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
if (r) return r;
if (node->char_len < 0) {
r = get_char_length_tree(node->target, reg, &n);
- if (r) return REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
n = node->char_len;
@@ -1348,7 +1204,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
if (r) return r;
if (node->char_len < 0) {
r = get_char_length_tree(node->target, reg, &n);
- if (r) return REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
n = node->char_len;
@@ -1361,7 +1217,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
break;
default:
- return REGERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
break;
}
@@ -1419,7 +1275,7 @@ compile_length_tree(Node* node, regex_t* reg)
BackrefNode* br = &(NBACKREF(node));
if (br->back_num == 1) {
- r = (br->back_static[0] <= 3
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
@@ -1447,7 +1303,7 @@ compile_length_tree(Node* node, regex_t* reg)
break;
default:
- return REGERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
break;
}
@@ -1514,12 +1370,8 @@ compile_tree(Node* node, regex_t* reg)
switch (NCTYPE(node).type) {
case CTYPE_WORD: op = OP_WORD; break;
case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB: op = OP_WORD_SB; break;
- case CTYPE_WORD_MB: op = OP_WORD_MB; break;
-#endif
default:
- return REGERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
break;
}
r = add_opcode(reg, op);
@@ -1527,7 +1379,10 @@ compile_tree(Node* node, regex_t* reg)
break;
case N_ANYCHAR:
- r = add_opcode(reg, OP_ANYCHAR);
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML);
+ else
+ r = add_opcode(reg, OP_ANYCHAR);
break;
case N_BACKREF:
@@ -1537,20 +1392,28 @@ compile_tree(Node* node, regex_t* reg)
if (br->back_num == 1) {
n = br->back_static[0];
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- case 3: r = add_opcode(reg, OP_BACKREF3); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
if (r) return r;
r = add_mem_num(reg, n);
- break;
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ case 3: r = add_opcode(reg, OP_BACKREF3); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
}
}
else {
int* p;
- add_opcode(reg, OP_BACKREF_MULTI);
+ add_opcode(reg, (IS_IGNORECASE(reg->options) ?
+ OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI));
if (r) return r;
add_length(reg, br->back_num);
if (r) return r;
@@ -1582,7 +1445,7 @@ compile_tree(Node* node, regex_t* reg)
break;
default:
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
#endif
break;
@@ -1591,6 +1454,194 @@ compile_tree(Node* node, regex_t* reg)
return r;
}
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} NumMap;
+
+static int
+noname_disable_map(Node** plink, NumMap* map, int* counter)
+{
+ int r = 0;
+ Node* node = *plink;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = noname_disable_map(&(NCONS(node).left), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUALIFIER:
+ {
+ Node** ptarget = &(NQUALIFIER(node).target);
+ Node* old = *ptarget;
+ r = noname_disable_map(ptarget, map, counter);
+ if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
+ onig_reduce_nested_qualifier(node, *ptarget);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ if (en->type == EFFECT_MEMORY) {
+ if (IS_EFFECT_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ else {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ }
+ }
+ else
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+renumber_node_backref(Node* node, NumMap* map)
+{
+ int i, pos, n, old_num;
+ int *backs;
+ BackrefNode* bn = &(NBACKREF(node));
+
+ if (! IS_BACKREF_NAME_REF(bn))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+
+ old_num = bn->back_num;
+ if (IS_NULL(bn->back_dynamic))
+ backs = bn->back_static;
+ else
+ backs = bn->back_dynamic;
+
+ for (i = 0, pos = 0; i < old_num; i++) {
+ n = map[backs[i]].new_val;
+ if (n > 0) {
+ backs[pos] = n;
+ pos++;
+ }
+ }
+
+ bn->back_num = pos;
+ return 0;
+}
+
+static int
+renumber_by_map(Node* node, NumMap* map)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = renumber_by_map(NCONS(node).left, map);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUALIFIER:
+ r = renumber_by_map(NQUALIFIER(node).target, map);
+ break;
+ case N_EFFECT:
+ r = renumber_by_map(NEFFECT(node).target, map);
+ break;
+
+ case N_BACKREF:
+ r = renumber_node_backref(node, map);
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+numbered_ref_check(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = numbered_ref_check(NCONS(node).left);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUALIFIER:
+ r = numbered_ref_check(NQUALIFIER(node).target);
+ break;
+ case N_EFFECT:
+ r = numbered_ref_check(NEFFECT(node).target);
+ break;
+
+ case N_BACKREF:
+ if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
+{
+ int r, i, pos, counter;
+ BitStatusType loc;
+ NumMap* map;
+
+ map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
+ CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
+ for (i = 1; i <= env->num_mem; i++) {
+ map[i].new_val = 0;
+ }
+ counter = 0;
+ r = noname_disable_map(root, map, &counter);
+ if (r != 0) return r;
+
+ r = renumber_by_map(*root, map);
+ if (r != 0) return r;
+
+ for (i = 1, pos = 1; i <= env->num_mem; i++) {
+ if (map[i].new_val > 0) {
+ SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
+ pos++;
+ }
+ }
+
+ loc = env->capture_history;
+ BIT_STATUS_CLEAR(env->capture_history);
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(loc, i)) {
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
+ }
+ }
+
+ env->num_mem = env->num_named;
+ reg->num_mem = env->num_named;
+ return 0;
+}
+#endif /* USE_NAMED_GROUP */
+
#ifdef USE_SUBEXP_CALL
static int
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
@@ -1598,17 +1649,17 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
int i, offset;
EffectNode* en;
AbsAddrType addr;
-#ifndef UNALIGNED_WORD_ACCESS
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
UChar buf[SERIALIZE_BUFSIZE];
#endif
for (i = 0; i < uslist->num; i++) {
en = &(NEFFECT(uslist->us[i].target));
- if (! IS_EFFECT_ADDR_FIXED(en)) return REGERR_PARSER_BUG;
+ if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
addr = en->call_addr;
offset = uslist->us[i].offset;
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
#else
SERIALIZE_ABSADDR(addr, buf);
@@ -1619,10 +1670,79 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
}
#endif
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
static int
-get_min_match_length(Node* node, RegDistance *min, ScanEnv* env)
+qualifiers_memory_node_info(Node* node)
{
- RegDistance tmin;
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ {
+ int v;
+ do {
+ v = qualifiers_memory_node_info(NCONS(node).left);
+ if (v > r) r = v;
+ } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&NCALL(node))) {
+ return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
+ }
+ else
+ r = qualifiers_memory_node_info(NCALL(node).target);
+ break;
+#endif
+
+ case N_QUALIFIER:
+ {
+ QualifierNode* qn = &(NQUALIFIER(node));
+ if (qn->upper != 0) {
+ r = qualifiers_memory_node_info(qn->target);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
+
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = qualifiers_memory_node_info(en->target);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_BACKREF:
+ case N_STRING:
+ case N_CTYPE:
+ case N_CCLASS:
+ case N_ANYCHAR:
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+
+static int
+get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
+{
+ OnigDistance tmin;
int r = 0;
*min = 0;
@@ -1636,11 +1756,11 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env)
if (br->state & NST_RECURSION) break;
backs = BACKREFS_P(br);
- if (backs[0] > env->num_mem) return REGERR_INVALID_BACKREF;
+ if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
r = get_min_match_length(nodes[backs[0]], min, env);
if (r != 0) break;
for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return REGERR_INVALID_BACKREF;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
r = get_min_match_length(nodes[backs[i]], &tmin, env);
if (r != 0) break;
if (*min > tmin) *min = tmin;
@@ -1692,10 +1812,6 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env)
switch (NCTYPE(node).type) {
case CTYPE_WORD: *min = 1; break;
case CTYPE_NOT_WORD: *min = 1; break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB: *min = 1; break;
- case CTYPE_WORD_MB: *min = 2; break;
-#endif
default:
break;
}
@@ -1752,9 +1868,9 @@ get_min_match_length(Node* node, RegDistance *min, ScanEnv* env)
}
static int
-get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
+get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
{
- RegDistance tmax;
+ OnigDistance tmax;
int r = 0;
*max = 0;
@@ -1785,18 +1901,9 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
switch (NCTYPE(node).type) {
case CTYPE_WORD:
case CTYPE_NOT_WORD:
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_MB:
-#endif
- *max = mbmaxlen_dist(env->enc);
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- *max = 1;
- break;
-#endif
-
default:
break;
}
@@ -1804,7 +1911,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
case N_CCLASS:
case N_ANYCHAR:
- *max = mbmaxlen_dist(env->enc);
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
case N_BACKREF:
@@ -1814,12 +1921,12 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
Node** nodes = SCANENV_MEM_NODES(env);
BackrefNode* br = &(NBACKREF(node));
if (br->state & NST_RECURSION) {
- *max = INFINITE_DISTANCE;
+ *max = ONIG_INFINITE_DISTANCE;
break;
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return REGERR_INVALID_BACKREF;
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
r = get_max_match_length(nodes[backs[i]], &tmax, env);
if (r != 0) break;
if (*max < tmax) *max = tmax;
@@ -1832,7 +1939,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
if (! IS_CALL_RECURSION(&(NCALL(node))))
r = get_max_match_length(NCALL(node).target, max, env);
else
- *max = INFINITE_DISTANCE;
+ *max = ONIG_INFINITE_DISTANCE;
break;
#endif
@@ -1846,7 +1953,7 @@ get_max_match_length(Node* node, RegDistance *max, ScanEnv* env)
if (! IS_REPEAT_INFINITE(qn->upper))
*max = distance_multiply(*max, qn->upper);
else
- *max = INFINITE_DISTANCE;
+ *max = ONIG_INFINITE_DISTANCE;
}
}
}
@@ -1937,7 +2044,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
StrNode* sn = &(NSTRING(node));
UChar *s = sn->s;
while (s < sn->end) {
- s += mblen(reg->enc, *s);
+ s += enc_len(reg->enc, *s);
(*len)++;
}
}
@@ -1969,10 +2076,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
switch (NCTYPE(node).type) {
case CTYPE_WORD:
case CTYPE_NOT_WORD:
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- case CTYPE_WORD_MB:
-#endif
*len = 1;
break;
}
@@ -2027,12 +2130,35 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
return get_char_length_tree1(node, reg, len, 0);
}
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int found;
+
+ if (code >= SINGLE_BYTE_SIZE) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
+ }
+ else {
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ }
+
+ if (cc->not == 0)
+ return found;
+ else
+ return !found;
+}
+
/* x is not included y ==> 1 : 0 */
static int
is_not_included(Node* x, Node* y, regex_t* reg)
{
int i, len;
- WCINT wc;
+ OnigCodePoint code;
UChar *p, c;
int ytype;
@@ -2056,11 +2182,6 @@ is_not_included(Node* x, Node* y, regex_t* reg)
else
return 0;
break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- case CTYPE_WORD_MB:
- break;
-#endif
default:
break;
}
@@ -2095,7 +2216,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
if (IS_NULL(xc->mbuf) && xc->not == 0) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(xc->bs, i)) {
- if (IS_SB_WORD(reg->enc, i)) return 0;
+ if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
}
}
return 1;
@@ -2104,7 +2225,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
break;
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! IS_SB_WORD(reg->enc, i)) {
+ if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
if (xc->not == 0) {
if (BITSET_AT(xc->bs, i))
return 0;
@@ -2118,11 +2239,6 @@ is_not_included(Node* x, Node* y, regex_t* reg)
return 1;
break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- case CTYPE_WORD_MB:
- break;
-#endif
default:
break;
}
@@ -2169,19 +2285,11 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case N_CTYPE:
switch (NCTYPE(y).type) {
case CTYPE_WORD:
- return (IS_WORD_STR(reg->enc, xs->s, xs->end) ? 0 : 1);
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
break;
case CTYPE_NOT_WORD:
- return (IS_WORD_STR(reg->enc, xs->s, xs->end) ? 1 : 0);
- break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- return (ismb(reg->enc, c) ? 1 : 0);
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
break;
- case CTYPE_WORD_MB:
- return (ismb(reg->enc, c) ? 0 : 1);
- break;
-#endif
default:
break;
}
@@ -2190,25 +2298,10 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case N_CCLASS:
{
CClassNode* cc = &(NCCLASS(y));
- if (ismb(reg->enc, c)) {
- if (IS_NULL(cc->mbuf))
- return (cc->not == 0 ? 1 : 0);
- else {
- len = mblen(reg->enc, c);
- wc = MB2WC(xs->s, xs->s + len, reg->enc);
- p = cc->mbuf->p + SIZE_BITSET;
- if (regex_is_in_wc_range(p, wc))
- return (cc->not == 0 ? 0 : 1);
- else
- return (cc->not == 0 ? 1 : 0);
- }
- }
- else {
- if (BITSET_AT(cc->bs, c) == 0)
- return (cc->not == 0 ? 1 : 0);
- else
- return (cc->not == 0 ? 0 : 1);
- }
+
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + enc_len(reg->enc, c));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
}
break;
@@ -2219,9 +2312,16 @@ is_not_included(Node* x, Node* y, regex_t* reg)
len = NSTRING_LEN(x);
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
if (NSTRING_IS_CASE_AMBIG(x) || NSTRING_IS_CASE_AMBIG(y)) {
- for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
- if (TOLOWER(reg->enc, *p) != TOLOWER(reg->enc, *q))
- return 1;
+ UChar plow[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar qlow[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ int plen, qlen;
+ for (p = ys->s, q = xs->s; q < xs->end; ) {
+ plen = ONIGENC_MBC_TO_LOWER(reg->enc, p, plow);
+ qlen = ONIGENC_MBC_TO_LOWER(reg->enc, q, qlow);
+ if (plen != qlen || onig_strncmp(plow, qlow, plen) != 0)
+ return 1;
+ p += enc_len(reg->enc, *p);
+ q += enc_len(reg->enc, *q);
}
}
else {
@@ -2279,7 +2379,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
if (exact != 0 &&
!NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- if (! IS_AMBIGUITY_CHAR(reg->enc, *(sn->s)))
+ if (! ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, sn->s))
n = node;
}
else {
@@ -2306,7 +2406,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
switch (en->type) {
case EFFECT_OPTION:
{
- RegOptionType options = reg->options;
+ OnigOptionType options = reg->options;
reg->options = NEFFECT(node).option;
n = get_head_value_node(NEFFECT(node).target, exact, reg);
@@ -2398,7 +2498,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
case N_LIST:
{
Node *x;
- RegDistance min;
+ OnigDistance min;
int ret;
x = node;
@@ -2508,7 +2608,7 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
if (IS_EFFECT_RECURSION(en)) {
SET_EFFECT_STATUS(node, NST_MARK1);
r = subexp_inf_recursive_check(en->target, env, 1);
- if (r > 0) return REGERR_NEVER_ENDING_RECURSION;
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
CLEAR_EFFECT_STATUS(node, NST_MARK1);
}
r = subexp_inf_recursive_check_trav(en->target, env);
@@ -2684,36 +2784,51 @@ setup_subexp_call(Node* node, ScanEnv* env)
CallNode* cn = &(NCALL(node));
Node** nodes = SCANENV_MEM_NODES(env);
-#ifdef USE_NAMED_SUBEXP
- n = regex_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
+#ifdef USE_NAMED_GROUP
+ n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
#else
- n = REGERR_UNDEFINED_GROUP_REFERENCE;
+ n = -1;
#endif
if (n <= 0) {
/* name not found, check group number. (?*ddd) */
p = cn->name;
- num = regex_scan_unsigned_number(&p, cn->name_end, env->enc);
+ num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
if (num <= 0 || p != cn->name_end) {
- regex_scan_env_set_error_string(env,
- REGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return REGERR_UNDEFINED_NAME_REFERENCE;
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+#ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (num > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
}
- if (num > env->num_mem) return REGERR_UNDEFINED_GROUP_REFERENCE;
cn->ref_num = num;
goto set_call_attr;
}
else if (n > 1) {
- regex_scan_env_set_error_string(env,
- REGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return REGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
}
else {
cn->ref_num = refs[0];
set_call_attr:
cn->target = nodes[cn->ref_num];
- if (IS_NULL(cn->target)) return REGERR_INVALID_SUBEXP_NAME;
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
SET_EFFECT_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->backtrack_mem, cn->ref_num);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
cn->unset_addr_list = env->unset_addr_list;
}
}
@@ -2762,8 +2877,8 @@ divide_look_behind_alternatives(Node* node)
np = node;
while ((np = NCONS(np).right) != NULL_NODE) {
- insert_node = regex_node_new_anchor(anc_type);
- CHECK_NULL_RETURN_VAL(insert_node, REGERR_MEMORY);
+ insert_node = onig_node_new_anchor(anc_type);
+ CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
NANCHOR(insert_node).target = NCONS(np).left;
NCONS(np).left = insert_node;
}
@@ -2787,12 +2902,12 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
if (r == 0)
an->char_len = len;
else if (r == GET_CHAR_LEN_VARLEN)
- r = REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
r = divide_look_behind_alternatives(node);
else
- r = REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
return r;
@@ -2820,8 +2935,8 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(x)) {
y = get_head_value_node(next_node, 0, reg);
if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
- Node* en = regex_node_new_effect(EFFECT_STOP_BACKTRACK);
- CHECK_NULL_RETURN_VAL(en, REGERR_MEMORY);
+ Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
SET_EFFECT_STATUS(en, NST_SIMPLE_REPEAT);
swap_node(node, en);
NEFFECT(node).target = en;
@@ -2846,7 +2961,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
#define IN_REPEAT (1<<2)
/* setup_tree does the following work.
- 1. check empty loop. (set qn->target_may_empty)
+ 1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -2882,13 +2997,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case N_CCLASS:
if (IS_IGNORECASE(reg->options)) {
- int c, t;
+ int i;
+ UChar c, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
BitSetRef bs = NCCLASS(node).bs;
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- t = TOLOWER(reg->enc, c);
- if (t != c) {
- if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, t);
- if (BITSET_AT(bs, t)) BITSET_SET_BIT(bs, c);
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ c = (UChar )i;
+ ONIGENC_MBC_TO_LOWER(reg->enc, &c, lowbuf);
+ if (*lowbuf != c) {
+ if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, *lowbuf);
+ if (BITSET_AT(bs, *lowbuf)) BITSET_SET_BIT(bs, c);
}
}
}
@@ -2900,7 +3017,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
UChar* p = sn->s;
while (p < sn->end) {
- if (IS_AMBIGUITY_CHAR(reg->enc, *p)) {
+ if (ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p)) {
NSTRING_SET_CASE_AMBIG(node);
break;
}
@@ -2926,9 +3043,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
BackrefNode* br = &(NBACKREF(node));
p = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return REGERR_INVALID_BACKREF;
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
- BIT_STATUS_ON_AT(env->backtrack_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
}
}
@@ -2936,7 +3053,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case N_QUALIFIER:
{
- RegDistance d;
+ OnigDistance d;
QualifierNode* qn = &(NQUALIFIER(node));
Node* target = qn->target;
@@ -2944,7 +3061,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = get_min_match_length(target, &d, env);
if (r) break;
if (d == 0) {
- qn->target_may_empty = 1;
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ r = qualifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
+#endif
#if 0
r = get_max_match_length(target, &d, env);
if (r == 0 && d == 0) {
@@ -2974,19 +3098,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int i, n = qn->lower;
- regex_node_conv_to_str_node(node, NSTRING(target).flag);
+ onig_node_conv_to_str_node(node, NSTRING(target).flag);
for (i = 0; i < n; i++) {
- r = regex_node_str_cat(node, sn->s, sn->end);
+ r = onig_node_str_cat(node, sn->s, sn->end);
if (r) break;
}
- regex_node_free(target);
+ onig_node_free(target);
break; /* break case N_QUALIFIER: */
}
}
}
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (qn->greedy && !qn->target_may_empty) {
+ if (qn->greedy && (qn->target_empty_info != 0)) {
if (NTYPE(target) == N_QUALIFIER) {
QualifierNode* tqn = &(NQUALIFIER(target));
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -3009,7 +3133,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
switch (en->type) {
case EFFECT_OPTION:
{
- RegOptionType options = reg->options;
+ OnigOptionType options = reg->options;
reg->options = NEFFECT(node).option;
r = setup_tree(NEFFECT(node).target, reg, state, env);
reg->options = options;
@@ -3018,7 +3142,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case EFFECT_MEMORY:
if ((state & (IN_ALT | IN_NOT | IN_REPEAT)) != 0) {
- BIT_STATUS_ON_AT(env->backtrack_mem, en->regnum);
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
/* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
}
/* fall */
@@ -3073,7 +3197,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
if (r < 0) return r;
- if (r > 0) return REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
r = setup_look_behind(node, reg, env);
if (r != 0) return r;
r = setup_tree(an->target, reg, state, env);
@@ -3085,7 +3209,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
if (r < 0) return r;
- if (r > 0) return REGERR_INVALID_LOOK_BEHIND_PATTERN;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
r = setup_look_behind(node, reg, env);
if (r != 0) return r;
r = setup_tree(an->target, reg, (state | IN_NOT), env);
@@ -3104,18 +3228,21 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
/* set skip map for Boyer-Moor search */
static int
-set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case,
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case,
UChar skip[], int** int_skip)
{
int i, len;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
len = end - s;
- if (len < REG_CHAR_TABLE_SIZE) {
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+ if (len < ONIG_CHAR_TABLE_SIZE) {
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
if (ignore_case) {
- for (i = 0; i < len - 1; i++)
- skip[TOLOWER(enc, s[i])] = len - 1 - i;
+ for (i = 0; i < len - 1; i++) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ skip[*lowbuf] = len - 1 - i;
+ }
}
else {
for (i = 0; i < len - 1; i++)
@@ -3124,14 +3251,16 @@ set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case,
}
else {
if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * REG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return REGERR_MEMORY;
+ *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
}
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
if (ignore_case) {
- for (i = 0; i < len - 1; i++)
- (*int_skip)[TOLOWER(enc, s[i])] = len - 1 - i;
+ for (i = 0; i < len - 1; i++) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ (*int_skip)[*lowbuf] = len - 1 - i;
+ }
}
else {
for (i = 0; i < len - 1; i++)
@@ -3144,16 +3273,15 @@ set_bm_skip(UChar* s, UChar* end, RegCharEncoding enc, int ignore_case,
#define OPT_EXACT_MAXLEN 24
typedef struct {
- RegDistance min; /* min byte length */
- RegDistance max; /* max byte length */
+ OnigDistance min; /* min byte length */
+ OnigDistance max; /* max byte length */
} MinMaxLen;
typedef struct {
MinMaxLen mmd;
BitStatusType backrefed_status;
- RegCharEncoding enc;
- RegOptionType options;
- RegTransTableType transtable;
+ OnigEncoding enc;
+ OnigOptionType options;
ScanEnv* scan_env;
} OptEnv;
@@ -3177,7 +3305,7 @@ typedef struct {
OptAncInfo anc;
int value; /* weighted value */
- UChar map[REG_CHAR_TABLE_SIZE];
+ UChar map[ONIG_CHAR_TABLE_SIZE];
} OptMapInfo;
typedef struct {
@@ -3230,7 +3358,7 @@ distance_value(MinMaxLen* mm)
int d;
- if (mm->max == INFINITE_DISTANCE) return 0;
+ if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
@@ -3265,7 +3393,7 @@ is_equal_mml(MinMaxLen* a, MinMaxLen* b)
static void
-set_mml(MinMaxLen* mml, RegDistance min, RegDistance max)
+set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
{
mml->min = min;
mml->max = max;
@@ -3292,7 +3420,7 @@ add_mml(MinMaxLen* to, MinMaxLen* from)
}
static void
-add_len_mml(MinMaxLen* to, RegDistance len)
+add_len_mml(MinMaxLen* to, OnigDistance len)
{
to->min = distance_add(to->min, len);
to->max = distance_add(to->max, len);
@@ -3326,7 +3454,7 @@ copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
static void
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
- RegDistance left_len, RegDistance right_len)
+ OnigDistance left_len, OnigDistance right_len)
{
clear_opt_anc_info(to);
@@ -3433,7 +3561,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
static void
concat_opt_exact_info_str(OptExactInfo* to,
- UChar* s, UChar* end, int raw, RegCharEncoding code)
+ UChar* s, UChar* end, int raw, OnigEncoding enc)
{
int i, j, len;
UChar *p;
@@ -3443,7 +3571,8 @@ concat_opt_exact_info_str(OptExactInfo* to,
to->s[i++] = *p++;
}
else {
- len = mblen(code, *p);
+ len = enc_len(enc, *p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len; j++)
to->s[i++] = *p++;
}
@@ -3469,7 +3598,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
for (i = 0; i < to->len && i < add->len; ) {
if (to->s[i] != add->s[i]) break;
- len = mblen(env->enc, to->s[i]);
+ len = enc_len(env->enc, to->s[i]);
for (j = 1; j < len; j++) {
if (to->s[i+j] != add->s[i+j]) break;
@@ -3508,7 +3637,7 @@ clear_opt_map_info(OptMapInfo* map)
clear_mml(&map->mmd);
clear_opt_anc_info(&map->anc);
map->value = 0;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
map->map[i] = 0;
}
@@ -3528,19 +3657,23 @@ add_char_opt_map_info(OptMapInfo* map, int c)
}
static void
-add_char_amb_opt_map_info(OptMapInfo* map, int c, RegCharEncoding enc)
+add_char_amb_opt_map_info(OptMapInfo* map, int c, OnigEncoding enc)
{
- int i, t;
+ UChar x, low[ONIGENC_MBC_TO_LOWER_MAXLEN];
add_char_opt_map_info(map, c);
- t = TOLOWER(enc, c);
- if (t != c) {
- add_char_opt_map_info(map, t);
+
+ x = (UChar )c;
+ ONIGENC_MBC_TO_LOWER(enc, &x, low);
+ if (*low != x) {
+ add_char_opt_map_info(map, (int )(*low));
}
else {
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) {
- t = TOLOWER(enc, i);
- if (t == c) add_char_opt_map_info(map, i);
+ int i;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ x = (UChar )i;
+ ONIGENC_MBC_TO_LOWER(enc, &x, low);
+ if ((int )(*low) == c) add_char_opt_map_info(map, i);
}
}
}
@@ -3592,7 +3725,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
alt_merge_mml(&to->mmd, &add->mmd);
val = 0;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) {
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
if (add->map[i])
to->map[i] = 1;
@@ -3645,9 +3778,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
}
if (add->map.value > 0 && to->len.max == 0) {
- concat_opt_anc_info(&tanc, &to->anc, &add->map.anc,
- to->len.max, add->len.max);
- copy_opt_anc_info(&add->map.anc, &tanc);
+ if (add->map.mmd.max == 0)
+ add->map.anc.left_anchor |= to->anc.left_anchor;
}
exb_reach = to->exb.reach_end;
@@ -3764,8 +3896,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
else {
for (p = sn->s; p < sn->end; ) {
- len = mblen(env->enc, *p);
- if (len == 1 && IS_AMBIGUITY_CHAR(env->enc, *p)) {
+ len = enc_len(env->enc, *p);
+ if (len == 1 && ONIGENC_IS_MBC_CASE_AMBIG(env->enc, p)) {
break;
}
p += len;
@@ -3790,7 +3922,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (slen > 0) {
if (p == sn->s)
- add_char_amb_opt_map_info(&opt->map, *(sn->s), env->transtable);
+ add_char_amb_opt_map_info(&opt->map, *(sn->s), env->enc);
else
add_char_opt_map_info(&opt->map, *(sn->s));
}
@@ -3805,11 +3937,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case N_CCLASS:
{
- int i, z, len, found;
+ int i, z, len, found, mb_found;
CClassNode* cc = &(NCCLASS(node));
/* no need to check ignore case. (setted in setup_tree()) */
- found = 0;
+ found = mb_found = 0;
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
z = BITSET_AT(cc->bs, i);
if ((z && !cc->not) || (!z && cc->not)) {
@@ -3818,21 +3950,30 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
}
- if (IS_NOT_NULL(cc->mbuf)) {
+ if (IS_NULL(cc->mbuf)) {
+ if (cc->not) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ add_char_opt_map_info(&opt->map, i);
+ }
+ mb_found = 1;
+ }
+ }
+ else {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT((BitSetRef )(cc->mbuf->p), i)) {
- found = 1;
+ z = ONIGENC_IS_MBC_HEAD(env->enc, i);
+ if (z) {
+ mb_found = 1;
add_char_opt_map_info(&opt->map, i);
}
}
}
- if (found) {
- if (IS_NULL(cc->mbuf))
- len = bitset_mbmaxlen(cc->bs, cc->not, env->enc);
- else
- len = mbmaxlen_dist(env->enc);
-
+ if (mb_found) {
+ len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, 1, len);
+ }
+ else if (found) {
+ len = 1;
set_mml(&opt->len, 1, len);
}
}
@@ -3843,15 +3984,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
int c;
int len, min, max;
- min = mbmaxlen_dist(env->enc);
+ min = ONIGENC_MBC_MAXLEN_DIST(env->enc);
max = 0;
+#define IS_WORD_HEAD_BYTE(enc,b) \
+ (ONIGENC_IS_MBC_ASCII(&b) ? ONIGENC_IS_CODE_WORD(enc,((OnigCodePoint )b)) \
+ : ONIGENC_IS_MBC_HEAD(enc,b))
+
switch (NCTYPE(node).type) {
case CTYPE_WORD:
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_WORD_HEAD(env->enc, c)) {
+ if (IS_WORD_HEAD_BYTE(env->enc, c)) {
add_char_opt_map_info(&opt->map, c);
- len = mblen(env->enc, c);
+ len = enc_len(env->enc, c);
if (len < min) min = len;
if (len > max) max = len;
}
@@ -3860,36 +4005,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case CTYPE_NOT_WORD:
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_WORD_HEAD(env->enc, c)) {
+ if (! IS_WORD_HEAD_BYTE(env->enc, c)) {
add_char_opt_map_info(&opt->map, c);
- len = mblen(env->enc, c);
+ len = enc_len(env->enc, c);
if (len < min) min = len;
if (len > max) max = len;
}
}
break;
-
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_SB_WORD(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- }
- }
- min = max = 1;
- break;
-
- case CTYPE_WORD_MB:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_MB_WORD(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- len = mblen(env->enc, c);
- if (len < min) min = len;
- if (len > max) max = len;
- }
- }
- break;
-#endif
}
set_mml(&opt->len, min, max);
@@ -3898,7 +4021,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case N_ANYCHAR:
{
- RegDistance len = mbmaxlen_dist(env->enc);
+ OnigDistance len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
set_mml(&opt->len, 1, len);
}
break;
@@ -3944,12 +4067,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
{
int i;
int* backs;
- RegDistance min, max, tmin, tmax;
+ OnigDistance min, max, tmin, tmax;
Node** nodes = SCANENV_MEM_NODES(env->scan_env);
BackrefNode* br = &(NBACKREF(node));
if (br->state & NST_RECURSION) {
- set_mml(&opt->len, 0, INFINITE_DISTANCE);
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
break;
}
backs = BACKREFS_P(br);
@@ -3972,9 +4095,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
#ifdef USE_SUBEXP_CALL
case N_CALL:
if (IS_CALL_RECURSION(&(NCALL(node))))
- set_mml(&opt->len, 0, INFINITE_DISTANCE);
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
else {
+ OnigOptionType save = env->options;
+ env->options = NEFFECT(NCALL(node).target).option;
r = optimize_node_left(NCALL(node).target, opt, env);
+ env->options = save;
}
break;
#endif
@@ -3982,7 +4108,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case N_QUALIFIER:
{
int i;
- RegDistance min, max;
+ OnigDistance min, max;
NodeOptInfo nopt;
QualifierNode* qn = &(NQUALIFIER(node));
@@ -4024,7 +4150,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
min = distance_multiply(nopt.len.min, qn->lower);
if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? INFINITE_DISTANCE : 0);
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
else
max = distance_multiply(nopt.len.max, qn->upper);
@@ -4039,7 +4165,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
switch (en->type) {
case EFFECT_OPTION:
{
- RegOptionType save = env->options;
+ OnigOptionType save = env->options;
env->options = en->option;
r = optimize_node_left(en->target, opt, env);
@@ -4051,10 +4177,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
#ifdef USE_SUBEXP_CALL
en->opt_count++;
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
- RegDistance min, max;
+ OnigDistance min, max;
min = 0;
- max = INFINITE_DISTANCE;
+ max = ONIG_INFINITE_DISTANCE;
if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
set_mml(&opt->len, min, max);
@@ -4079,11 +4205,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
break;
default:
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
fprintf(stderr, "optimize_node_left: undefined node type %d\n",
NTYPE(node));
#endif
- r = REGERR_TYPE_BUG;
+ r = ONIGERR_TYPE_BUG;
break;
}
@@ -4097,22 +4223,32 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (e->len == 0) return 0;
- reg->exact = regex_strdup(e->s, e->s + e->len);
- if (IS_NULL(reg->exact)) return REGERR_MEMORY;
+ reg->exact = onig_strdup(e->s, e->s + e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
if (e->ignore_case) {
- UChar *p;
- int len;
- for (p = reg->exact; p < reg->exact_end; ) {
- len = mblen(reg->enc, *p);
- if (len == 1) {
- *p = TOLOWER(reg->enc, *p);
+ UChar buf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ int len, low_len, i, j, alloc_size;
+
+ alloc_size = e->len;
+ i = j = 0;
+ while (i < e->len) {
+ low_len = ONIGENC_MBC_TO_LOWER(reg->enc, &(e->s[i]), buf);
+ len = enc_len(reg->enc, e->s[i]);
+ if (low_len > alloc_size - i) {
+ reg->exact = xrealloc(reg->exact, alloc_size * 2);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ alloc_size *= 2;
}
- p += len;
+
+ xmemcpy(&(reg->exact[j]), buf, low_len);
+ i += len;
+ j += low_len;
}
- reg->optimize = REG_OPTIMIZE_EXACT_IC;
+ reg->exact_end = reg->exact + j;
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
else {
int allow_reverse;
@@ -4121,7 +4257,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
allow_reverse = 1;
else
allow_reverse =
- regex_is_allow_reverse_match(reg->enc, reg->exact, reg->exact_end);
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 0,
@@ -4129,17 +4265,17 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (r) return r;
reg->optimize = (allow_reverse != 0
- ? REG_OPTIMIZE_EXACT_BM : REG_OPTIMIZE_EXACT_BM_NOT_REV);
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
}
else {
- reg->optimize = REG_OPTIMIZE_EXACT;
+ reg->optimize = ONIG_OPTIMIZE_EXACT;
}
}
reg->dmin = e->mmd.min;
reg->dmax = e->mmd.max;
- if (reg->dmin != INFINITE_DISTANCE) {
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
}
@@ -4151,14 +4287,14 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m)
{
int i;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
reg->map[i] = m->map[i];
- reg->optimize = REG_OPTIMIZE_MAP;
+ reg->optimize = ONIG_OPTIMIZE_MAP;
reg->dmin = m->mmd.min;
reg->dmax = m->mmd.max;
- if (reg->dmin != INFINITE_DISTANCE) {
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
reg->threshold_len = reg->dmin + 1;
}
}
@@ -4170,7 +4306,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc)
reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
}
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
static void print_optimize_info(FILE* f, regex_t* reg);
#endif
@@ -4222,7 +4358,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
}
-#if defined(REG_DEBUG_COMPILE) || defined(REG_DEBUG_MATCH)
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
print_optimize_info(stderr, reg);
#endif
return r;
@@ -4231,7 +4367,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
static void
clear_optimize_info(regex_t* reg)
{
- reg->optimize = REG_OPTIMIZE_NONE;
+ reg->optimize = ONIG_OPTIMIZE_NONE;
reg->anchor = 0;
reg->anchor_dmin = 0;
reg->anchor_dmax = 0;
@@ -4244,19 +4380,19 @@ clear_optimize_info(regex_t* reg)
}
}
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
static void
-print_distance_range(FILE* f, RegDistance a, RegDistance b)
+print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
{
- if (a == INFINITE_DISTANCE)
+ if (a == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
fprintf(f, "(%u)", a);
fputs("-", f);
- if (b == INFINITE_DISTANCE)
+ if (b == ONIG_INFINITE_DISTANCE)
fputs("inf", f);
else
fprintf(f, "(%u)", b);
@@ -4337,58 +4473,58 @@ print_optimize_info(FILE* f, regex_t* reg)
}
fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
}
- else if (reg->optimize & REG_OPTIMIZE_MAP) {
+ else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
int i, n = 0;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
if (reg->map[i]) n++;
fprintf(f, "map: n=%d\n", n);
if (n > 0) {
fputc('[', f);
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
- if (reg->map[i] && mblen(reg->enc, i) == 1 &&
- IS_CODE_PRINT(reg->enc, i))
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ if (reg->map[i] && enc_len(reg->enc, i) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, i))
fputc(i, f);
fprintf(f, "]\n");
}
}
}
-#endif /* REG_DEBUG */
+#endif /* ONIG_DEBUG */
static void
-regex_free_body(regex_t* reg)
+onig_free_body(regex_t* reg)
{
if (IS_NOT_NULL(reg->p)) xfree(reg->p);
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) regex_free(reg->chain);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
-#ifdef USE_NAMED_SUBEXP
- regex_names_free(reg);
+#ifdef USE_NAMED_GROUP
+ onig_names_free(reg);
#endif
}
extern void
-regex_free(regex_t* reg)
+onig_free(regex_t* reg)
{
if (IS_NOT_NULL(reg)) {
- regex_free_body(reg);
+ onig_free_body(reg);
xfree(reg);
}
}
#define REGEX_TRANSFER(to,from) do {\
- (to)->state = REG_STATE_MODIFY;\
- regex_free_body(to);\
+ (to)->state = ONIG_STATE_MODIFY;\
+ onig_free_body(to);\
xmemcpy(to, from, sizeof(regex_t));\
xfree(from);\
} while (0)
static void
-regex_transfer(regex_t* to, regex_t* from)
+onig_transfer(regex_t* to, regex_t* from)
{
THREAD_ATOMIC_START;
REGEX_TRANSFER(to, from);
@@ -4402,7 +4538,7 @@ regex_transfer(regex_t* to, regex_t* from)
} while (0)
static void
-regex_chain_link_add(regex_t* to, regex_t* add)
+onig_chain_link_add(regex_t* to, regex_t* add)
{
THREAD_ATOMIC_START;
REGEX_CHAIN_HEAD(to);
@@ -4411,7 +4547,7 @@ regex_chain_link_add(regex_t* to, regex_t* add)
}
extern void
-regex_chain_reduce(regex_t* reg)
+onig_chain_reduce(regex_t* reg)
{
regex_t *head, *prev;
@@ -4419,7 +4555,7 @@ regex_chain_reduce(regex_t* reg)
prev = reg;
head = prev->chain;
if (IS_NOT_NULL(head)) {
- reg->state = REG_STATE_MODIFY;
+ reg->state = ONIG_STATE_MODIFY;
while (IS_NOT_NULL(head->chain)) {
prev = head;
head = head->chain;
@@ -4432,37 +4568,36 @@ regex_chain_reduce(regex_t* reg)
#if 0
extern int
-regex_clone(regex_t** to, regex_t* from)
+onig_clone(regex_t** to, regex_t* from)
{
int r, size;
regex_t* reg;
- if (REG_STATE(from) == REG_STATE_NORMAL) {
+ if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
from->state++; /* increment as search counter */
if (IS_NOT_NULL(from->chain)) {
- regex_chain_reduce(from);
+ onig_chain_reduce(from);
from->state++;
}
}
else {
int n = 0;
- while (REG_STATE(from) < REG_STATE_NORMAL) {
+ while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
- return REGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
from->state++; /* increment as search counter */
}
- r = regex_alloc_init(&reg, REG_OPTION_NONE, RegDefaultCharEncoding,
- REG_TRANSTABLE_USE_DEFAULT);
+ r = onig_alloc_init(&reg, ONIG_OPTION_NONE, from->enc, ONIG_SYNTAX_DEFAULT);
if (r != 0) {
from->state--;
return r;
}
- xmemcpy(reg, from, sizeof(regex_t));
- reg->state = REG_STATE_NORMAL;
+ xmemcpy(reg, from, sizeof(onig_t));
+ reg->state = ONIG_STATE_NORMAL;
reg->chain = (regex_t* )NULL;
if (from->p) {
@@ -4479,20 +4614,20 @@ regex_clone(regex_t** to, regex_t* from)
}
if (from->int_map) {
- size = sizeof(int) * REG_CHAR_TABLE_SIZE;
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
reg->int_map = (int* )xmalloc(size);
if (IS_NULL(reg->int_map)) goto mem_error;
xmemcpy(reg->int_map, from->int_map, size);
}
if (from->int_map_backward) {
- size = sizeof(int) * REG_CHAR_TABLE_SIZE;
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
reg->int_map_backward = (int* )xmalloc(size);
if (IS_NULL(reg->int_map_backward)) goto mem_error;
xmemcpy(reg->int_map_backward, from->int_map_backward, size);
}
-#ifdef USE_NAMED_SUBEXP
+#ifdef USE_NAMED_GROUP
reg->name_table = names_clone(from); /* names_clone is not implemented */
#endif
@@ -4502,18 +4637,20 @@ regex_clone(regex_t** to, regex_t* from)
mem_error:
from->state--;
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
}
#endif
-#ifdef REG_DEBUG
-static void print_tree P_((FILE* f, Node* node));
+#ifdef ONIG_DEBUG
static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
#endif
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void print_tree P_((FILE* f, Node* node));
+#endif
extern int
-regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
- RegErrorInfo* einfo)
+onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+ OnigErrorInfo* einfo)
{
#define COMPILE_INIT_SIZE 20
@@ -4524,13 +4661,13 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
UnsetAddrList uslist;
#endif
- reg->state = REG_STATE_COMPILING;
+ reg->state = ONIG_STATE_COMPILING;
if (reg->alloc == 0) {
init_size = (pattern_end - pattern) * 2;
if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
r = BBUF_INIT(reg, init_size);
- if (r) goto end;
+ if (r != 0) goto end;
}
else
reg->used = 0;
@@ -4539,26 +4676,40 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
reg->num_repeat = 0;
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
- reg->repeat_range = (RegRepeatRange* )NULL;
+ reg->repeat_range = (OnigRepeatRange* )NULL;
+
+ r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
+ if (r != 0) goto err;
+
+#ifdef USE_NAMED_GROUP
+ /* mixed use named group and no-named group */
+ if (scan_env.num_named > 0 &&
+ IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ if (scan_env.num_named != scan_env.num_mem)
+ r = disable_noname_group_capture(&root, reg, &scan_env);
+ else
+ r = numbered_ref_check(root);
- r = regex_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
- if (r) goto err;
+ if (r != 0) goto err;
+ }
+#endif
-#ifdef REG_DEBUG_PARSE_TREE
+#ifdef ONIG_DEBUG_PARSE_TREE
print_tree(stderr, root);
#endif
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_init(&uslist, scan_env.num_call);
- if (r) goto err;
+ if (r != 0) goto err;
scan_env.unset_addr_list = &uslist;
r = setup_subexp_call(root, &scan_env);
- if (r) goto err_unset;
+ if (r != 0) goto err_unset;
r = subexp_recursive_check_trav(root, &scan_env);
- if (r < 0) goto err_unset;
+ if (r < 0) goto err_unset;
r = subexp_inf_recursive_check_trav(root, &scan_env);
- if (r) goto err_unset;
+ if (r != 0) goto err_unset;
reg->num_call = scan_env.num_call;
}
@@ -4567,14 +4718,22 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
#endif
r = setup_tree(root, reg, 0, &scan_env);
- if (r) goto err_unset;
+ if (r != 0) goto err_unset;
- reg->backtrack_mem = scan_env.backtrack_mem;
+ reg->capture_history = scan_env.capture_history;
+ reg->bt_mem_start = scan_env.bt_mem_start;
+ reg->bt_mem_start |= reg->capture_history;
+ if (IS_FIND_CONDITION(reg->options))
+ BIT_STATUS_ON_ALL(reg->bt_mem_end);
+ else {
+ reg->bt_mem_end = scan_env.bt_mem_end;
+ reg->bt_mem_end |= reg->capture_history;
+ }
clear_optimize_info(reg);
-#ifndef REG_DONT_OPTIMIZE
+#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
- if (r) goto err_unset;
+ if (r != 0) goto err_unset;
#endif
if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
@@ -4593,10 +4752,10 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
}
#endif
- if ((reg->num_repeat != 0) || IS_FIND_CONDITION(reg->options))
+ if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
- if (reg->backtrack_mem != 0)
+ if (reg->bt_mem_start != 0)
reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
else
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
@@ -4607,17 +4766,17 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
unset_addr_list_end(&uslist);
}
#endif
- regex_node_free(root);
+ onig_node_free(root);
-#ifdef REG_DEBUG_COMPILE
-#ifdef USE_NAMED_SUBEXP
- regex_print_names(stderr, reg);
+#ifdef ONIG_DEBUG_COMPILE
+#ifdef USE_NAMED_GROUP
+ onig_print_names(stderr, reg);
#endif
print_compiled_byte_code_list(stderr, reg);
#endif
end:
- reg->state = REG_STATE_NORMAL;
+ reg->state = ONIG_STATE_NORMAL;
return r;
err_unset:
@@ -4634,51 +4793,54 @@ regex_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
}
}
- if (IS_NOT_NULL(root)) regex_node_free(root);
+ if (IS_NOT_NULL(root)) onig_node_free(root);
if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
xfree(scan_env.mem_nodes_dynamic);
return r;
}
extern int
-regex_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
- RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
- RegErrorInfo* einfo)
+onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
{
int r;
regex_t *new_reg;
- r = regex_new(&new_reg, pattern, pattern_end, option, code, syntax, einfo);
+ r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
if (r) return r;
- if (REG_STATE(reg) == REG_STATE_NORMAL) {
- regex_transfer(reg, new_reg);
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
}
else {
- regex_chain_link_add(reg, new_reg);
+ onig_chain_link_add(reg, new_reg);
}
return 0;
}
-static int regex_inited = 0;
+static int onig_inited = 0;
extern int
-regex_alloc_init(regex_t** reg, RegOptionType option, RegCharEncoding enc,
- RegSyntaxType* syntax)
+onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
+ OnigSyntaxType* syntax)
{
- if (! regex_inited)
- regex_init();
+ if (! onig_inited)
+ onig_init();
+
+ if (ONIGENC_IS_UNDEF(enc))
+ return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
*reg = (regex_t* )xmalloc(sizeof(regex_t));
- if (IS_NULL(*reg)) return REGERR_MEMORY;
+ if (IS_NULL(*reg)) return ONIGERR_MEMORY;
- if ((option & REG_OPTION_NEGATE_SINGLELINE) != 0) {
+ if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
- option &= ~REG_OPTION_SINGLELINE;
+ option &= ~ONIG_OPTION_SINGLELINE;
}
else
option |= syntax->options;
- (*reg)->state = REG_STATE_NORMAL;
+ (*reg)->state = ONIG_STATE_NORMAL;
(*reg)->enc = enc;
(*reg)->options = option;
(*reg)->syntax = syntax;
@@ -4697,82 +4859,65 @@ regex_alloc_init(regex_t** reg, RegOptionType option, RegCharEncoding enc,
}
extern int
-regex_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
- RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax,
- RegErrorInfo* einfo)
+onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
{
int r;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
- r = regex_alloc_init(reg, option, code, syntax);
+ r = onig_alloc_init(reg, option, enc, syntax);
if (r) return r;
- r = regex_compile(*reg, pattern, pattern_end, einfo);
+ r = onig_compile(*reg, pattern, pattern_end, einfo);
if (r) {
- regex_free(*reg);
+ onig_free(*reg);
*reg = NULL;
}
return r;
}
-extern void
-regex_set_default_trans_table(UChar* table)
+extern int
+onig_init()
{
- int i;
+ if (onig_inited != 0)
+ return 0;
- if (table && table != DefaultTransTable) {
- DefaultTransTable = table;
+ onig_inited = 1;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
- AmbiguityTable[i] = 0;
+ THREAD_ATOMIC_START;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) {
- AmbiguityTable[table[i]]++;
- if (table[i] != i)
- AmbiguityTable[i] += 2;
- }
- }
-}
+ onigenc_init();
+ onigenc_set_default_caseconv_table((UChar* )0);
-extern int
-regex_init()
-{
- regex_inited = 1;
-
- THREAD_ATOMIC_START;
-#ifdef DEFAULT_TRANSTABLE_EXIST
- if (! DefaultTransTable) /* check re_set_casetable() called already. */
- regex_set_default_trans_table(DTT);
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_statistics_init();
#endif
-#ifdef REG_DEBUG_STATISTICS
- regex_statistics_init();
-#endif
THREAD_ATOMIC_END;
-
return 0;
}
extern int
-regex_end()
+onig_end()
{
-#ifdef REG_DEBUG_STATISTICS
- regex_print_statistics(stderr);
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_print_statistics(stderr);
#endif
#ifdef USE_RECYCLE_NODE
- regex_free_node_list();
+ onig_free_node_list();
#endif
- regex_inited = 0;
+ onig_inited = 0;
return 0;
}
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
-RegOpInfoType RegOpInfo[] = {
+OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON },
{ OP_EXACT1, "exact1", ARG_SPECIAL },
@@ -4796,8 +4941,11 @@ RegOpInfoType RegOpInfo[] = {
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
{ OP_WORD, "word", ARG_NON },
{ OP_NOT_WORD, "not-word", ARG_NON },
{ OP_WORD_SB, "word-sb", ARG_NON },
@@ -4816,7 +4964,9 @@ RegOpInfoType RegOpInfo[] = {
{ OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREF3, "backref3", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_MEMNUM },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
{ OP_MEMORY_START, "mem-start", ARG_MEMNUM },
{ OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
@@ -4837,6 +4987,8 @@ RegOpInfoType RegOpInfo[] = {
{ OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
{ OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
{ OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
{ OP_PUSH_POS, "push-pos", ARG_NON },
{ OP_POP_POS, "pop-pos", ARG_NON },
{ OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
@@ -4856,9 +5008,9 @@ op2name(int opcode)
{
int i;
- for (i = 0; RegOpInfo[i].opcode >= 0; i++) {
- if (opcode == RegOpInfo[i].opcode)
- return RegOpInfo[i].name;
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].name;
}
return "";
}
@@ -4868,9 +5020,9 @@ op2arg_type(int opcode)
{
int i;
- for (i = 0; RegOpInfo[i].opcode >= 0; i++) {
- if (opcode == RegOpInfo[i].opcode)
- return RegOpInfo[i].arg_type;
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].arg_type;
}
return ARG_SPECIAL;
}
@@ -4899,13 +5051,13 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
}
extern void
-regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
{
int i, n, arg_type;
RelAddrType addr;
LengthType len;
MemNumType mem;
- WCINT wc;
+ OnigCodePoint code;
UChar *q;
fprintf(f, "[%s", op2name(*bp));
@@ -4935,7 +5087,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
break;
case ARG_OPTION:
{
- RegOptionType option = *((RegOptionType* )bp);
+ OnigOptionType option = *((OnigOptionType* )bp);
bp += SIZE_OPTION;
fprintf(f, ":%d", option);
}
@@ -4946,6 +5098,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
switch (*bp++) {
case OP_EXACT1:
case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
p_string(f, 1, bp++); break;
case OP_EXACT2:
p_string(f, 2, bp); bp += 2; break;
@@ -5014,12 +5167,12 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef UNALIGNED_WORD_ACCESS
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
#endif
- GET_WCINT(wc, q);
+ GET_CODE_POINT(code, q);
bp += len;
- fprintf(f, ":%d:%d", (int )wc, len);
+ fprintf(f, ":%d:%d", (int )code, len);
break;
case OP_CCLASS_MIX:
@@ -5028,15 +5181,16 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
bp += SIZE_BITSET;
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef UNALIGNED_WORD_ACCESS
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
#endif
- GET_WCINT(wc, q);
+ GET_CODE_POINT(code, q);
bp += len;
- fprintf(f, ":%d:%d:%d", n, (int )wc, len);
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
case OP_BACKREF_MULTI:
+ case OP_BACKREF_MULTI_IC:
fputs(" ", f);
GET_LENGTH_INC(len, bp);
for (i = 0; i < len; i++) {
@@ -5078,7 +5232,7 @@ regex_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
break;
default:
- fprintf(stderr, "regex_print_compiled_byte_code: undefined code %d\n",
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);
}
}
@@ -5104,7 +5258,7 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
else
fputs(" ", f);
}
- regex_print_compiled_byte_code(f, bp, &bp);
+ onig_print_compiled_byte_code(f, bp, &bp);
}
fprintf(f, "\n");
@@ -5145,7 +5299,13 @@ print_indent_tree(FILE* f, Node* node, int indent)
case N_STRING:
fprintf(f, "<string%s:%x>",
(NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
- for (p = NSTRING(node).s; p < NSTRING(node).end; p++) fputc(*p, f);
+ for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
+ if (*p >= 0x20 && *p < 0x7f)
+ fputc(*p, f);
+ else {
+ fprintf(f, " 0x%02x", *p);
+ }
+ }
break;
case N_CCLASS:
@@ -5171,10 +5331,6 @@ print_indent_tree(FILE* f, Node* node, int indent)
switch (NCTYPE(node).type) {
case CTYPE_WORD: fputs("word", f); break;
case CTYPE_NOT_WORD: fputs("not word", f); break;
-#ifdef USE_SBMB_CLASS
- case CTYPE_WORD_SB: fputs("word-sb", f); break;
- case CTYPE_WORD_MB: fputs("word-mb", f); break;
-#endif
default:
fprintf(f, "ERROR: undefined ctype.\n");
exit(0);
@@ -5273,10 +5429,12 @@ print_indent_tree(FILE* f, Node* node, int indent)
fprintf(f, "\n");
fflush(f);
}
+#endif /* ONIG_DEBUG */
+#ifdef ONIG_DEBUG_PARSE_TREE
static void
print_tree(FILE* f, Node* node)
{
print_indent_tree(f, node, 0);
}
-#endif /* REG_DEBUG */
+#endif
diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c
new file mode 100644
index 0000000000..7e9c640bb6
--- /dev/null
+++ b/ext/mbstring/oniguruma/regenc.c
@@ -0,0 +1,586 @@
+/**********************************************************************
+
+ regenc.c - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#include "regenc.h"
+
+OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
+
+extern int
+onigenc_init()
+{
+ return 0;
+}
+
+extern OnigEncoding
+onigenc_get_default_encoding()
+{
+ return OnigEncDefaultCharEncoding;
+}
+
+extern int
+onigenc_set_default_encoding(OnigEncoding enc)
+{
+ OnigEncDefaultCharEncoding = enc;
+ return 0;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ if (p < s) {
+ p += enc_len(enc, *p);
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
+ UChar* start, UChar* s, UChar** prev)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+
+ if (p < s) {
+ if (prev) *prev = p;
+ p += enc_len(enc, *p);
+ }
+ else {
+ if (prev) *prev = (UChar* )NULL; /* Sorry */
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ if (s <= start)
+ return (UChar* )NULL;
+
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+}
+
+extern UChar*
+onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
+{
+ while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
+ if (s <= start)
+ return (UChar* )NULL;
+
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+ }
+ return s;
+}
+
+
+#ifndef ONIG_RUBY_M17N
+
+#ifndef NOT_RUBY
+#define USE_APPLICATION_TO_LOWER_CASE_TABLE
+#endif
+
+UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
+
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+static UChar BuiltInAsciiToLowerCaseTable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
+
+unsigned short OnigEncAsciiCtypeTable[256] = {
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
+ 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
+ 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
+ 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
+ 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
+ 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
+ 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+extern void
+onigenc_set_default_caseconv_table(UChar* table)
+{
+ if (table == (UChar* )0) {
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+ table = BuiltInAsciiToLowerCaseTable;
+#else
+ return ;
+#endif
+ }
+
+ if (table != OnigEncAsciiToLowerCaseTable) {
+ OnigEncAsciiToLowerCaseTable = table;
+ }
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+}
+
+extern int
+onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
+{
+ return 0;
+}
+
+extern int
+onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
+ OnigEncFoldMatchInfo** info)
+{
+ return -1;
+}
+
+extern int
+onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
+ OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+{
+ return -1;
+}
+
+/* for single byte encodings */
+extern int
+onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
+{
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1; /* return byte length of converted char to lower */
+}
+
+extern int
+onigenc_ascii_mbc_is_case_ambig(UChar* p)
+{
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+}
+
+extern OnigCodePoint
+onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
+{
+ return (OnigCodePoint )(*p);
+}
+
+extern int
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
+{
+ return 1;
+}
+
+extern int
+onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
+{
+ return (code & 0xff);
+}
+
+extern int
+onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ *buf = (code & 0xff);
+ return 1;
+}
+
+extern UChar*
+onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
+{
+ return s;
+}
+
+extern int
+onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
+{
+ return TRUE;
+}
+
+extern OnigCodePoint
+onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ c = *p++;
+ len = enc_len(enc, c);
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+extern int
+onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
+{
+ int len;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ return 1;
+ }
+ else {
+ len = enc_len(enc, *p);
+ if (lower != p) {
+ /* memcpy(lower, p, len); */
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ return len; /* return byte length of converted to lower char */
+ }
+}
+
+extern int
+onigenc_mbn_mbc_is_case_ambig(UChar* p)
+{
+ if (ONIGENC_IS_MBC_ASCII(p))
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+
+ return FALSE;
+}
+
+extern int
+onigenc_mb2_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb4_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff000000) != 0) {
+ first = (code >> 24) & 0xff;
+ }
+ else if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) {
+ *p++ = ((code >> 8) & 0xff);
+ }
+ *p++ = (code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf[0]) != (p - buf))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff000000) != 0) {
+ *p++ = ((code >> 24) & 0xff);
+ }
+ if ((code & 0xff0000) != 0) {
+ *p++ = ((code >> 16) & 0xff);
+ }
+ if ((code & 0xff00) != 0) {
+ *p++ = ((code >> 8) & 0xff);
+ }
+ *p++ = (code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf[0]) != (p - buf))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb2_code_to_mbc_first(code);
+ return (enc_len(enc, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+extern int
+onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ int first = onigenc_mb4_code_to_mbc_first(code);
+ return (enc_len(enc, first) > 1 ? TRUE : FALSE);
+ }
+
+ ctype &= ~ONIGENC_CTYPE_WORD;
+ if (ctype == 0) return FALSE;
+ }
+
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+extern int
+onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
+{
+ static OnigCodePoint list[] = { 0xdf };
+ *codes = list;
+ return 1;
+}
+
+extern int
+onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
+ OnigEncFoldMatchInfo** info)
+{
+ /* German alphabet ess-tsett(U+00DF) */
+ static OnigEncFoldMatchInfo ss = {
+ 3,
+ { 1, 2, 2 },
+ { "\337", "ss", "SS" } /* 0337: 0xdf */
+ };
+
+ if (p >= end) return -1;
+
+ if (*p == 0xdf) {
+ *info = &ss;
+ return 1;
+ }
+ else if (p + 1 < end) {
+ if ((*p == 'S' && *(p+1) == 'S') ||
+ (*p == 's' && *(p+1) == 's')) {
+ *info = &ss;
+ return 2;
+ }
+ }
+
+ return -1; /* is not a fold string. */
+}
+
+#else /* ONIG_RUBY_M17N */
+
+extern int
+onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
+{
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ return m17n_isalpha(enc, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return m17n_iscntrl(enc, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return m17n_isdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return m17n_islower(enc, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return m17n_isprint(enc, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return m17n_ispunct(enc, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return m17n_isspace(enc, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return m17n_isupper(enc, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return m17n_isxdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return m17n_iswchar(enc, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return (code < 128 ? TRUE : FALSE);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return m17n_isalnum(enc, code);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ int c, len;
+
+ m17n_mbcput(enc, code, buf);
+ c = m17n_firstbyte(enc, code);
+ len = enc_len(enc, c);
+ return len;
+}
+
+extern int
+onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
+{
+ unsigned int c, low;
+
+ c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+ low = m17n_tolower(enc, c);
+ m17n_mbcput(enc, low, buf);
+
+ return m17n_codelen(enc, low);
+}
+
+extern int
+onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
+{
+ unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+
+ if (m17n_isupper(enc, c) || m17n_islower(enc, c))
+ return TRUE;
+ return FALSE;
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ while (!m17n_islead(enc, *p) && p > start) p--;
+ while (p + (len = enc_len(enc, *p)) < s) {
+ p += len;
+ }
+ if (p + len == s) return s;
+ return p;
+}
+
+extern int
+onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
+{
+ return ONIGENC_IS_SINGLEBYTE(enc);
+}
+
+extern void
+onigenc_set_default_caseconv_table(UChar* table) { }
+
+#endif /* ONIG_RUBY_M17N */
diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h
new file mode 100644
index 0000000000..935080a950
--- /dev/null
+++ b/ext/mbstring/oniguruma/regenc.h
@@ -0,0 +1,97 @@
+/**********************************************************************
+
+ regenc.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef REGENC_H
+#define REGENC_H
+
+#ifndef ONIG_SOURCE_IS_WRAPPED
+#include "config.h"
+#endif
+
+#include "oniguruma.h"
+
+#ifndef NULL
+#define NULL ((void* )0)
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* error codes */
+/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
+/* syntax error [-400, -999] */
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+
+#define ONIG_NEWLINE '\n'
+#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
+#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
+#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
+#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+
+
+#ifdef ONIG_RUBY_M17N
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
+
+#else /* ONIG_RUBY_M17N */
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+
+/* for encoding system implementation (internal) */
+ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
+ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
+ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
+
+/* methods for single byte encoding */
+ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
+ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
+ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
+
+/* methods for multi byte encoding */
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
+ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
+ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+
+ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
+ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
+
+#endif /* is not ONIG_RUBY_M17N */
+
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+
+#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
+#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
+ ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
+#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
+
+#endif /* REGENC_H */
diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c
index a1e86c34f1..5a6c31b82e 100644
--- a/ext/mbstring/oniguruma/regerror.c
+++ b/ext/mbstring/oniguruma/regerror.c
@@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
@@ -17,118 +17,136 @@
#endif
extern char*
-regex_error_code_to_format(int code)
+onig_error_code_to_format(int code)
{
char *p;
if (code >= 0) return (char* )0;
switch (code) {
- case REG_MISMATCH:
+ case ONIG_MISMATCH:
p = "mismatch"; break;
- case REG_NO_SUPPORT_CONFIG:
+ case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
- case REGERR_MEMORY:
+ case ONIGERR_MEMORY:
p = "fail to memory allocation"; break;
- case REGERR_MATCH_STACK_LIMIT_OVER:
+ case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
- case REGERR_TYPE_BUG:
+ case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
- case REGERR_PARSER_BUG:
+ case ONIGERR_PARSER_BUG:
p = "internal parser error (bug)"; break;
- case REGERR_STACK_BUG:
+ case ONIGERR_STACK_BUG:
p = "stack error (bug)"; break;
- case REGERR_UNDEFINED_BYTECODE:
+ case ONIGERR_UNDEFINED_BYTECODE:
p = "undefined bytecode (bug)"; break;
- case REGERR_UNEXPECTED_BYTECODE:
+ case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
- case REGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
+ case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
p = "default multibyte-encoding is not setted"; break;
- case REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
+ case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
- case REGERR_END_PATTERN_AT_LEFT_BRACE:
+ case ONIGERR_INVALID_ARGUMENT:
+ p = "invalid argument"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
- case REGERR_END_PATTERN_AT_LEFT_BRACKET:
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
- case REGERR_EMPTY_CHAR_CLASS:
+ case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
- case REGERR_PREMATURE_END_OF_CHAR_CLASS:
+ case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
- case REGERR_END_PATTERN_AT_BACKSLASH:
+ case ONIGERR_END_PATTERN_AT_BACKSLASH:
p = "end pattern at backslash"; break;
- case REGERR_END_PATTERN_AT_META:
+ case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
- case REGERR_END_PATTERN_AT_CONTROL:
+ case ONIGERR_END_PATTERN_AT_CONTROL:
p = "end pattern at control"; break;
- case REGERR_META_CODE_SYNTAX:
+ case ONIGERR_META_CODE_SYNTAX:
p = "illegal meta-code syntax"; break;
- case REGERR_CONTROL_CODE_SYNTAX:
+ case ONIGERR_CONTROL_CODE_SYNTAX:
p = "illegal control-code syntax"; break;
- case REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
+ case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
- case REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
+ case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
- case REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
+ case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
- case REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
- case REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
- case REGERR_NESTED_REPEAT_OPERATOR:
+ case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
- case REGERR_UNMATCHED_CLOSE_PARENTHESIS:
+ case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
- case REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
+ case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
p = "end pattern with unmatched parenthesis"; break;
- case REGERR_END_PATTERN_IN_GROUP:
+ case ONIGERR_END_PATTERN_IN_GROUP:
p = "end pattern in group"; break;
- case REGERR_UNDEFINED_GROUP_OPTION:
+ case ONIGERR_UNDEFINED_GROUP_OPTION:
p = "undefined group option"; break;
- case REGERR_INVALID_POSIX_BRACKET_TYPE:
+ case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
p = "invalid POSIX bracket type"; break;
- case REGERR_INVALID_LOOK_BEHIND_PATTERN:
+ case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
p = "invalid pattern in look-behind"; break;
- case REGERR_INVALID_REPEAT_RANGE_PATTERN:
+ case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
p = "invalid repeat range {lower,upper}"; break;
- case REGERR_TOO_BIG_NUMBER:
+ case ONIGERR_TOO_BIG_NUMBER:
p = "too big number"; break;
- case REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
+ case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
p = "too big number for repeat range"; break;
- case REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
+ case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
p = "upper is smaller than lower in repeat range"; break;
- case REGERR_EMPTY_RANGE_IN_CHAR_CLASS:
+ case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
- case REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
+ case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
- case REGERR_TOO_MANY_MULTI_BYTE_RANGES:
+ case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
- case REGERR_TOO_SHORT_MULTI_BYTE_STRING:
+ case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
- case REGERR_TOO_BIG_BACKREF_NUMBER:
+ case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
- case REGERR_INVALID_BACKREF:
-#ifdef USE_NAMED_SUBEXP
+ case ONIGERR_INVALID_BACKREF:
+#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
#else
p = "invalid backref number"; break;
#endif
- case REGERR_TOO_BIG_WIDE_CHAR_VALUE:
+ case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
+ p = "numbered backref/call is not allowed. (use name)"; break;
+ case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
- case REGERR_TOO_LONG_WIDE_CHAR_VALUE:
+ case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
- case REGERR_INVALID_WIDE_CHAR_VALUE:
+ case ONIGERR_INVALID_WIDE_CHAR_VALUE:
p = "invalid wide-char value"; break;
- case REGERR_INVALID_SUBEXP_NAME:
- p = "invalid subexp name"; break;
- case REGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_EMPTY_GROUP_NAME:
+ p = "group name is empty"; break;
+ case ONIGERR_INVALID_GROUP_NAME:
+ p = "invalid group name <%n>"; break;
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+#ifdef USE_NAMED_GROUP
+ p = "invalid char in group name <%n>"; break;
+#else
+ p = "invalid char in group number <%n>"; break;
+#endif
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
p = "undefined name <%n> reference"; break;
- case REGERR_UNDEFINED_GROUP_REFERENCE:
- p = "undefined group reference"; break;
- case REGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ p = "undefined group <%n> reference"; break;
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ p = "multiplex defined name <%n>"; break;
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
p = "multiplex definition name <%n> call"; break;
- case REGERR_NEVER_ENDING_RECURSION:
+ case ONIGERR_NEVER_ENDING_RECURSION:
p = "never ending recursion"; break;
- case REGERR_OVER_THREAD_PASS_LIMIT_COUNT:
+ case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
+ p = "group number is too big for capture history"; break;
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ p = "invalid character property name"; break;
+ case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
default:
@@ -139,31 +157,36 @@ regex_error_code_to_format(int code)
}
-/* for REG_MAX_ERROR_MESSAGE_LEN */
+/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
#ifdef HAVE_STDARG_PROTOTYPES
-regex_error_code_to_str(UChar* s, int code, ...)
+onig_error_code_to_str(UChar* s, int code, ...)
#else
-regex_error_code_to_str(UChar* s, code, va_alist)
+onig_error_code_to_str(s, code, va_alist)
+ UChar* s;
int code;
va_dcl
#endif
{
UChar *p, *q;
- RegErrorInfo* einfo;
+ OnigErrorInfo* einfo;
int len;
va_list vargs;
va_init_list(vargs, code);
switch (code) {
- case REGERR_UNDEFINED_NAME_REFERENCE:
- case REGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- einfo = va_arg(vargs, RegErrorInfo*);
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ einfo = va_arg(vargs, OnigErrorInfo*);
len = einfo->par_end - einfo->par;
- q = regex_error_code_to_format(code);
+ q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
@@ -194,7 +217,7 @@ regex_error_code_to_str(UChar* s, code, va_alist)
break;
default:
- q = regex_error_code_to_format(code);
+ q = onig_error_code_to_format(code);
len = strlen(q);
xmemcpy(s, q, len);
s[len] = '\0';
@@ -208,13 +231,13 @@ regex_error_code_to_str(UChar* s, code, va_alist)
void
#ifdef HAVE_STDARG_PROTOTYPES
-regex_snprintf_with_pattern(char buf[], int bufsize, RegCharEncoding enc,
+onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
char* pat, char* pat_end, char *fmt, ...)
#else
-regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
+onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
char buf[];
int bufsize;
- RegCharEncoding enc;
+ OnigEncoding enc;
char* pat;
char* pat_end;
const char *fmt;
@@ -222,7 +245,7 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
#endif
{
int n, need, len;
- char *p, *s;
+ UChar *p, *s;
va_list args;
va_init_list(args, fmt);
@@ -236,21 +259,22 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
s = buf + strlen(buf);
p = pat;
- while (p < pat_end) {
- if (*p == '\\') {
+ while (p < (UChar* )pat_end) {
+ if (*p == MC_ESC) {
*s++ = *p++;
- len = mblen(enc, *p);
+ len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
- *s++ = '\\';
+ *s++ = MC_ESC;
*s++ = *p++;
}
- else if (ismb(enc, *p)) {
- len = mblen(enc, *p);
+ else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
+ len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
- else if (!IS_PRINT(*p) && !IS_SPACE(*p)) {
+ else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
+ !ONIGENC_IS_CODE_SPACE(enc, *p)) {
char b[5];
sprintf(b, "\\%03o", *p & 0377);
len = strlen(b);
diff --git a/ext/mbstring/oniguruma/regex.c b/ext/mbstring/oniguruma/regex.c
index 0c4a43be9e..764b3963d9 100644
--- a/ext/mbstring/oniguruma/regex.c
+++ b/ext/mbstring/oniguruma/regex.c
@@ -2,15 +2,27 @@
regex.c - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*
* Source wrapper for Ruby.
*/
+#define ONIG_SOURCE_IS_WRAPPED
+
+#include "regint.h"
+#include "regex.h"
#include "regparse.c"
#include "regcomp.c"
#include "regexec.c"
+#include "regenc.c"
#include "reggnu.c"
#include "regerror.c"
+
+#ifndef ONIG_RUBY_M17N
+#include "enc/ascii.c"
+#include "enc/utf8.c"
+#include "enc/euc_jp.c"
+#include "enc/sjis.c"
+#endif
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
index b7319ac4fb..c8772ba1f6 100644
--- a/ext/mbstring/oniguruma/regexec.c
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -2,44 +2,79 @@
regexec.c - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
-static UChar*
-get_right_adjust_char_head_with_prev(RegCharEncoding code,
- UChar* start, UChar* s, UChar** prev);
-static UChar*
-step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n);
+static void
+region_list_clear(OnigRegion** list)
+{
+ int i;
+ if (IS_NOT_NULL(list)) {
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (IS_NOT_NULL(list[i])) {
+ xfree(list[i]);
+ list[i] = (OnigRegion* )0;
+ }
+ }
+ }
+}
+
+static void
+region_list_free(OnigRegion* r)
+{
+ if (IS_NOT_NULL(r->list)) {
+ region_list_clear(r->list);
+ xfree(r->list);
+ r->list = (OnigRegion** )0;
+ }
+}
+
+static OnigRegion**
+region_list_new()
+{
+ int i;
+ OnigRegion** list;
+
+ list = (OnigRegion** )xmalloc(sizeof(OnigRegion*)
+ * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1));
+ CHECK_NULL_RETURN(list);
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ list[i] = (OnigRegion* )0;
+ }
+
+ return list;
+}
extern void
-regex_region_clear(RegRegion* region)
+onig_region_clear(OnigRegion* region)
{
int i;
for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = REG_REGION_NOTPOS;
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
+ region_list_clear(region->list);
}
extern int
-regex_region_resize(RegRegion* region, int n)
+onig_region_resize(OnigRegion* region, int n)
{
int i;
region->num_regs = n;
- if (n < REG_NREGION)
- n = REG_NREGION;
+ if (n < ONIG_NREGION)
+ n = ONIG_NREGION;
if (region->allocated == 0) {
region->beg = (int* )xmalloc(n * sizeof(int));
region->end = (int* )xmalloc(n * sizeof(int));
if (region->beg == 0 || region->end == 0)
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
region->allocated = n;
}
@@ -48,38 +83,111 @@ regex_region_resize(RegRegion* region, int n)
region->end = (int* )xrealloc(region->end, n * sizeof(int));
if (region->beg == 0 || region->end == 0)
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
region->allocated = n;
}
for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = REG_REGION_NOTPOS;
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+
+ if (IS_NOT_NULL(region->list))
+ region_list_clear(region->list);
+
+ return 0;
+}
+
+static int
+region_ensure_size(OnigRegion* region, int n)
+{
+ int i, new_size;
+
+ if (region->allocated >= n)
+ return 0;
+
+ new_size = region->allocated;
+ if (new_size == 0)
+ new_size = ONIG_NREGION;
+ while (new_size < n)
+ new_size *= 2;
+
+ if (region->allocated == 0) {
+ region->beg = (int* )xmalloc(new_size * sizeof(int));
+ region->end = (int* )xmalloc(new_size * sizeof(int));
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = new_size;
+ }
+ else if (region->allocated < new_size) {
+ region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int));
+ region->end = (int* )xrealloc(region->end, new_size * sizeof(int));
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = new_size;
+ }
+
+ for (i = region->num_regs; i < n; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
return 0;
}
+static int
+region_list_add_entry(OnigRegion* region, int group, int start, int end)
+{
+ int r, pos;
+ OnigRegion** list;
+
+ if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ if (IS_NULL(region->list)) {
+ region->list = region_list_new();
+ CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY);
+ }
+
+ list = region->list;
+ if (IS_NULL(list[group])) {
+ list[group] = onig_region_new();
+ CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY);
+ }
+
+ r = region_ensure_size(list[group], list[group]->num_regs + 1);
+ if (r != 0) return r;
+
+ pos = list[group]->num_regs;
+ list[group]->beg[pos] = start;
+ list[group]->end[pos] = end;
+ list[group]->num_regs++;
+
+ return 0;
+}
+
static void
-regex_region_init(RegRegion* region)
+onig_region_init(OnigRegion* region)
{
region->num_regs = 0;
region->allocated = 0;
region->beg = (int* )0;
region->end = (int* )0;
+ region->list = (OnigRegion** )0;
}
-extern RegRegion*
-regex_region_new()
+extern OnigRegion*
+onig_region_new()
{
- RegRegion* r;
+ OnigRegion* r;
- r = (RegRegion* )xmalloc(sizeof(RegRegion));
- regex_region_init(r);
+ r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
+ onig_region_init(r);
return r;
}
extern void
-regex_region_free(RegRegion* r, int free_self)
+onig_region_free(OnigRegion* r, int free_self)
{
if (r) {
if (r->allocated > 0) {
@@ -87,12 +195,13 @@ regex_region_free(RegRegion* r, int free_self)
if (r->end) xfree(r->end);
r->allocated = 0;
}
+ region_list_free(r);
if (free_self) xfree(r);
}
}
extern void
-regex_region_copy(RegRegion* to, RegRegion* from)
+onig_region_copy(OnigRegion* to, OnigRegion* from)
{
#define RREGC_SIZE (sizeof(int) * from->num_regs)
int i;
@@ -117,6 +226,29 @@ regex_region_copy(RegRegion* to, RegRegion* from)
to->end[i] = from->end[i];
}
to->num_regs = from->num_regs;
+
+ if (IS_NOT_NULL(from->list)) {
+ if (IS_NULL(to->list)) {
+ to->list = region_list_new();
+ }
+
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (IS_NOT_NULL(from->list[i])) {
+ if (IS_NULL(to->list[i]))
+ to->list[i] = onig_region_new();
+
+ onig_region_copy(to->list[i], from->list[i]);
+ }
+ else {
+ if (IS_NOT_NULL(to->list[i])) {
+ xfree(to->list[i]);
+ to->list[i] = (OnigRegion* )0;
+ }
+ }
+ }
+ }
+ else
+ region_list_free(to);
}
@@ -173,13 +305,14 @@ typedef struct _StackType {
#define STK_MEM_END 0x0300
#define STK_REPEAT_INC 0x0400
/* avoided by normal-POP */
-#define STK_POS 0x0500 /* used when POP-POS */
-#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_POS 0x0500 /* used when POP-POS */
+#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
#define STK_REPEAT 0x0700
#define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900
#define STK_MEM_END_MARK 0x0a00
-#define STK_VOID 0x0b00 /* for fill a blank */
+#define STK_VOID 0x0b00 /* for fill a blank */
+#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
/* stack type check mask */
#define STK_MASK_POP_USED 0x00ff
@@ -189,8 +322,8 @@ typedef struct _StackType {
typedef struct {
void* stack_p;
int stack_n;
- RegOptionType options;
- RegRegion* region;
+ OnigOptionType options;
+ OnigRegion* region;
UChar* start; /* search start position (for \G: BEGIN_POSITION) */
} MatchArg;
@@ -245,18 +378,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
if (IS_NULL(x)) {
STACK_SAVE;
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
}
xmemcpy(x, stk_base, n * sizeof(StackType));
n *= 2;
}
else {
n *= 2;
- if (n > MATCH_STACK_LIMIT_SIZE) return REGERR_MATCH_STACK_LIMIT_OVER;
+ if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
}
}
*arg_stk = x + (stk - stk_base);
@@ -365,6 +498,24 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while (0)
+#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
+ int level = 0;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) (start) = k->u.mem.pstr;\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
+ level--;\
+ if (level == 0) {\
+ (end) = k->u.mem.pstr;\
+ break;\
+ }\
+ }\
+ k++;\
+ }\
+} while (0)
+
#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_NULL_CHECK_START;\
@@ -373,6 +524,13 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
STACK_INC;\
} while(0)
+#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_END;\
+ stk->u.null_check.num = (cnum);\
+ STACK_INC;\
+} while(0)
+
#define STACK_PUSH_CALL_FRAME(pat) do {\
STACK_ENSURE(1);\
stk->type = STK_CALL_FRAME;\
@@ -387,7 +545,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p) \
if ((p) < stk_base) goto stack_error;
#else
@@ -522,6 +680,114 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while(0)
+#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ else level--;\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ else {\
+ level--;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ if (k->u.null_check.num == (id)) level++;\
+ }\
+ }\
+} while(0)
+
#define STACK_GET_REPEAT(id, k) do {\
int level = 0;\
k = stk;\
@@ -559,68 +825,63 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
-#define CASETABLE_TOLOWER(c) (casetable[c])
-
-/* byte_code is already converted to lower-case at string compile time */
-#define SBTRANSCMP(byte_code,c) (byte_code == CASETABLE_TOLOWER(c))
-
#define STRING_CMP(s1,s2,len) do {\
- if (ignore_case) {\
- int slen; \
- while (len > 0) {\
- slen = mblen(encode, *s1); \
- if (slen == 1) {\
- if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) \
- goto fail;\
- s1++; s2++; len--; \
- } \
- else {\
- len -= slen; \
- while (slen-- > 0) { \
- if (*s1++ != *s2++) goto fail;\
- } \
- }\
- }\
- }\
- else {\
- while (len-- > 0) {\
- if (*s1++ != *s2++) goto fail;\
- }\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) goto fail;\
}\
} while(0)
+#define STRING_CMP_IC(s1,ps2,len) do {\
+ if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+ goto fail; \
+} while(0)
+
+static int string_cmp_ic(OnigEncoding enc,
+ UChar* s1, UChar** ps2, int mblen)
+{
+ UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *p1, *p2, *end, *s2;
+ int len1, len2;
+
+ s2 = *ps2;
+ end = s1 + mblen;
+ while (s1 < end) {
+ len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1);
+ len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2);
+ if (len1 != len2) return 0;
+ p1 = buf1;
+ p2 = buf2;
+ while (len1-- > 0) {
+ if (*p1 != *p2) return 0;
+ p1++;
+ p2++;
+ }
+
+ s1 += enc_len(enc, *s1);
+ s2 += enc_len(enc, *s2);
+ }
+
+ *ps2 = s2;
+ return 1;
+}
+
#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
is_fail = 0;\
- if (ignore_case) {\
- int slen; \
- while (len > 0) {\
- slen = mblen(encode, *s1); \
- if (slen == 1) {\
- if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) {\
- is_fail = 1; break;\
- }\
- s1++; s2++; len--; \
- } \
- else {\
- len -= slen; \
- while (slen-- > 0) { \
- if (*s1++ != *s2++) {\
- is_fail = 1; break;\
- }\
- } \
- if (is_fail != 0) break;\
- }\
- }\
- }\
- else {\
- while (len-- > 0) {\
- if (*s1++ != *s2++) {\
- is_fail = 1; break;\
- }\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) {\
+ is_fail = 1; break;\
}\
}\
} while(0)
+#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+ is_fail = 1; \
+ else \
+ is_fail = 0; \
+} while(0)
+
#define ON_STR_BEGIN(s) ((s) == str)
#define ON_STR_END(s) ((s) == end)
#define IS_EMPTY_STR (str == end)
@@ -630,7 +891,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
-#ifdef REG_DEBUG_STATISTICS
+#ifdef ONIG_DEBUG_STATISTICS
#define USE_TIMEOFDAY
@@ -673,7 +934,15 @@ static int MaxStackDepth = 0;
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while (0)
-extern void regex_statistics_init()
+#ifdef RUBY_PLATFORM
+static VALUE onig_stat_print()
+{
+ onig_print_statistics(stderr);
+ return Qnil;
+}
+#endif
+
+extern void onig_statistics_init()
{
int i;
for (i = 0; i < 256; i++) {
@@ -682,26 +951,18 @@ extern void regex_statistics_init()
MaxStackDepth = 0;
#ifdef RUBY_PLATFORM
- rb_define_global_function("regex_stat_print", regex_stat_print, 0);
+ ONIG_RUBY_DEFINE_GLOBAL_FUNCTION("onig_stat_print", onig_stat_print, 0);
#endif
}
-#ifdef RUBY_PLATFORM
-static VALUE regex_stat_print()
-{
- regex_print_statistics(stderr);
- return Qnil;
-}
-#endif
-
extern void
-regex_print_statistics(FILE* f)
+onig_print_statistics(FILE* f)
{
int i;
fprintf(f, " count prev time\n");
- for (i = 0; RegOpInfo[i].opcode >= 0; i++) {
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], RegOpInfo[i].name);
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
}
fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
}
@@ -720,24 +981,24 @@ regex_print_statistics(FILE* f)
#endif
extern int
-regex_is_in_wc_range(UChar* p, WCINT wc)
+onig_is_in_code_range(UChar* p, OnigCodePoint code)
{
- WCINT n, *data;
+ OnigCodePoint n, *data;
int low, high, x;
- GET_WCINT(n, p);
- data = (WCINT* )p;
+ GET_CODE_POINT(n, p);
+ data = (OnigCodePoint* )p;
data++;
for (low = 0, high = n; low < high; ) {
x = (low + high) >> 1;
- if (wc > data[x * 2 + 1])
+ if (code > data[x * 2 + 1])
low = x + 1;
else
high = x;
}
- return ((low < n && wc >= data[low * 2]) ? 1 : 0);
+ return ((low < n && code >= data[low * 2]) ? 1 : 0);
}
@@ -757,13 +1018,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
{
static UChar FinishCode[] = { OP_FINISH };
- int i, n, num_mem, best_len, pop_level, find_cond;
+ int i, n, num_mem, best_len, pop_level;
LengthType tlen, tlen2;
MemNumType mem;
RelAddrType addr;
- RegOptionType option = reg->options;
- RegCharEncoding encode = reg->enc;
- unsigned char* casetable = DefaultTransTable;
+ OnigOptionType option = reg->options;
+ OnigEncoding encode = reg->enc;
int ignore_case;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
@@ -776,7 +1036,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
ignore_case = IS_IGNORECASE(option);
- find_cond = IS_FIND_CONDITION(option);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
repeat_stk = (StackIndex* )alloca_base;
@@ -791,7 +1050,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
}
-#ifdef REG_DEBUG_MATCH
+#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
(int )str, (int )end, (int )sstart, (int )sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
@@ -799,17 +1058,17 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
#endif
STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
- best_len = REG_MISMATCH;
+ best_len = ONIG_MISMATCH;
s = sstart;
while (1) {
-#ifdef REG_DEBUG_MATCH
+#ifdef ONIG_DEBUG_MATCH
{
UChar *q, *bp, buf[50];
int len;
fprintf(stderr, "%4d> \"", (int )(s - str));
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
- len = mblen(encode, *q);
+ len = enc_len(encode, *q);
while (len-- > 0) *bp++ = *q++;
}
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
@@ -817,7 +1076,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
*bp = 0;
fputs(buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- regex_print_compiled_byte_code(stderr, p, NULL);
+ onig_print_compiled_byte_code(stderr, p, NULL);
fprintf(stderr, "\n");
}
#endif
@@ -827,9 +1086,10 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_END: STAT_OP_IN(OP_END);
n = s - sstart;
if (n > best_len) {
- RegRegion* region = msa->region;
+ OnigRegion* region = msa->region;
best_len = n;
if (region) {
+#ifdef USE_POSIX_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
@@ -837,46 +1097,71 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
rmt[0].rm_eo = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->backtrack_mem, i))
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
else
rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
- rmt[i].rm_eo = (find_cond != 0
+ rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
? STACK_AT(mem_end_stk[i])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[i])) - str;
}
else {
- rmt[i].rm_so = rmt[i].rm_eo = REG_REGION_NOTPOS;
+ rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
}
}
}
else {
+#endif /* USE_POSIX_REGION_OPTION */
region->beg[0] = sstart - str;
region->end[0] = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->backtrack_mem, i))
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
else
region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
- region->end[i] = (find_cond != 0
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
? STACK_AT(mem_end_stk[i])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[i])) - str;
}
else {
- region->beg[i] = region->end[i] = REG_REGION_NOTPOS;
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
}
- }
- }
- }
+
+ if (reg->capture_history != 0) {
+ UChar *pstart, *pend;
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0) {
+ stkp = stk_base;
+ do {
+ STACK_GET_MEM_RANGE(stkp, i, pstart, pend);
+ if (stkp < stk) {
+ int r;
+ r = region_list_add_entry(region, i,
+ pstart - str, pend - str);
+ if (r) {
+ STACK_SAVE;
+ return r;
+ }
+ }
+ stkp++;
+ } while (stkp < stk);
+ }
+ }
+ } /* list of captures */
+#ifdef USE_POSIX_REGION_OPTION
+ } /* else IS_POSIX_REGION() */
+#endif
+ } /* if (region) */
+ } /* n > best_len */
STAT_OP_OUT;
- if (find_cond) {
+ if (IS_FIND_CONDITION(option)) {
if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
- best_len = REG_MISMATCH;
+ best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
if (IS_FIND_LONGEST(option) && s < end) {
@@ -902,9 +1187,19 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
- if (! SBTRANSCMP(*p, *s)) goto fail;
- DATA_ENSURE(1);
- p++; s++;
+ {
+ int len;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
+ DATA_ENSURE(len);
+ q = lowbuf;
+ s += enc_len(encode, *s);
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
STAT_OP_OUT;
break;
@@ -976,13 +1271,26 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
- GET_LENGTH_INC(tlen, p);
- DATA_ENSURE(tlen);
- while (tlen-- > 0) {
- if (! SBTRANSCMP(*p, *s)) goto fail;
- p++; s++;
+ {
+ int len;
+ UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
+
+ while (p < endp) {
+ len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
+ DATA_ENSURE(len);
+ sprev = s;
+ s += enc_len(encode, *s);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
}
- sprev = s - 1;
+
STAT_OP_OUT;
continue;
break;
@@ -1078,31 +1386,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
- s += mblen(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */
+ s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */
STAT_OP_OUT;
break;
case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
- if (! ismb(encode, *s)) goto fail;
+ if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail;
cclass_mb:
GET_LENGTH_INC(tlen, p);
{
- WCINT wc;
+ OnigCodePoint code;
UChar *ss;
- int mb_len = mblen(encode, *s);
+ int mb_len = enc_len(encode, *s);
DATA_ENSURE(mb_len);
ss = s;
s += mb_len;
- wc = MB2WC(ss, s, encode);
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-#ifdef UNALIGNED_WORD_ACCESS
- if (! regex_is_in_wc_range(p, wc)) goto fail;
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (! onig_is_in_code_range(p, code)) goto fail;
#else
q = p;
ALIGNMENT_RIGHT(q);
- if (! regex_is_in_wc_range(q, wc)) goto fail;
+ if (! onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -1111,7 +1419,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
- if (ismb(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
p += SIZE_BITSET;
goto cclass_mb;
}
@@ -1131,12 +1439,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
- s += mblen(encode, *s);
+ s += enc_len(encode, *s);
STAT_OP_OUT;
break;
case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
- if (! ismb(encode, *s)) {
+ if (! ONIGENC_IS_MBC_HEAD(encode, *s)) {
DATA_ENSURE(1);
s++;
GET_LENGTH_INC(tlen, p);
@@ -1147,9 +1455,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
cclass_mb_not:
GET_LENGTH_INC(tlen, p);
{
- WCINT wc;
+ OnigCodePoint code;
UChar *ss;
- int mb_len = mblen(encode, *s);
+ int mb_len = enc_len(encode, *s);
if (s + mb_len > end) {
s = end;
@@ -1159,14 +1467,14 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
ss = s;
s += mb_len;
- wc = MB2WC(ss, s, encode);
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-#ifdef UNALIGNED_WORD_ACCESS
- if (regex_is_in_wc_range(p, wc)) goto fail;
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (onig_is_in_code_range(p, code)) goto fail;
#else
q = p;
ALIGNMENT_RIGHT(q);
- if (regex_is_in_wc_range(q, wc)) goto fail;
+ if (onig_is_in_code_range(q, code)) goto fail;
#endif
}
p += tlen;
@@ -1177,7 +1485,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
- if (ismb(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
p += SIZE_BITSET;
goto cclass_mb_not;
}
@@ -1194,51 +1502,56 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
- DATA_ENSURE(1);
- if (ismb(encode, *s)) {
- n = mblen(encode, *s);
+ n = enc_len(encode, *s);
+ if (n > 1) {
DATA_ENSURE(n);
s += n;
}
else {
- if (! IS_MULTILINE(option)) {
- if (IS_NEWLINE(*s)) goto fail;
- }
+ DATA_ENSURE(1);
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
s++;
}
STAT_OP_OUT;
break;
+ case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
+ n = enc_len(encode, *s);
+ DATA_ENSURE(n);
+ s += n;
+ STAT_OP_OUT;
+ break;
+
case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
- if (! IS_MULTILINE(option)) {
- while (s < end) {
- STACK_PUSH_ALT(p, s, sprev);
- if (ismb(encode, *s)) {
- n = mblen(encode, *s);
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- if (IS_NEWLINE(*s)) goto fail;
- sprev = s;
- s++;
- }
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
+ sprev = s;
+ s++;
}
}
- else {
- while (s < end) {
- STACK_PUSH_ALT(p, s, sprev);
- if (ismb(encode, *s)) {
- n = mblen(encode, *s);
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, *s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
}
}
STAT_OP_OUT;
@@ -1249,16 +1562,14 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- if (ismb(encode, *s)) {
- n = mblen(encode, *s);
+ n = enc_len(encode, *s);
+ if (n > 1) {
DATA_ENSURE(n);
sprev = s;
s += n;
}
else {
- if (! IS_MULTILINE(option)) {
- if (IS_NEWLINE(*s)) goto fail;
- }
+ if (ONIG_IS_NEWLINE(*s)) goto fail;
sprev = s;
s++;
}
@@ -1267,53 +1578,57 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
break;
- case OP_WORD: STAT_OP_IN(OP_WORD);
- DATA_ENSURE(1);
- if (! IS_WORD_STR_INC(encode, s, end))
- goto fail;
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ while (s < end) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enc_len(encode, *s);
+ if (n >1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
STAT_OP_OUT;
break;
- case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
+ case OP_WORD: STAT_OP_IN(OP_WORD);
DATA_ENSURE(1);
- if (IS_WORD_STR_INC(encode, s, end))
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- STAT_OP_OUT;
- break;
-#ifdef USE_SBMB_CLASS
- case OP_WORD_SB: STAT_OP_IN(OP_WORD_SB);
- DATA_ENSURE(1);
- if (! IS_SB_WORD(encode, *s))
- goto fail;
- s++;
+ s += enc_len(encode, *s);
STAT_OP_OUT;
break;
- case OP_WORD_MB: STAT_OP_IN(OP_WORD_MB);
+ case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
DATA_ENSURE(1);
- if (! IS_MB_WORD(encode, *s))
+ if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- n = mblen(encode, *s);
- DATA_ENSURE(n);
- s += n;
+ s += enc_len(encode, *s);
STAT_OP_OUT;
break;
-#endif
case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
DATA_ENSURE(1);
- if (! IS_WORD_STR(encode, s, end))
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
}
else if (ON_STR_END(s)) {
- if (! IS_WORD_STR(encode, sprev, end))
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
goto fail;
}
else {
- if (IS_WORD_STR(encode, s, end) == IS_WORD_STR(encode, sprev, end))
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
goto fail;
}
STAT_OP_OUT;
@@ -1322,15 +1637,16 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end))
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
}
else if (ON_STR_END(s)) {
- if (IS_WORD_STR(encode, sprev, end))
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
goto fail;
}
else {
- if (IS_WORD_STR(encode, s, end) != IS_WORD_STR(encode, sprev, end))
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
goto fail;
}
STAT_OP_OUT;
@@ -1339,8 +1655,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
#ifdef USE_WORD_BEGIN_END
case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
- if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !IS_WORD_STR(encode, sprev, end)) {
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
STAT_OP_OUT;
continue;
}
@@ -1349,8 +1665,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
- if (!ON_STR_BEGIN(s) && IS_WORD_STR(encode, sprev, end)) {
- if (ON_STR_END(s) || !IS_WORD_STR(encode, s, end)) {
+ if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
STAT_OP_OUT;
continue;
}
@@ -1379,7 +1695,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
continue;
}
- else if (IS_NEWLINE(*sprev) && !ON_STR_END(s)) {
+ else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) {
STAT_OP_OUT;
continue;
}
@@ -1389,7 +1705,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
STAT_OP_OUT;
@@ -1398,7 +1714,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- else if (IS_NEWLINE(*s)) {
+ else if (ONIG_IS_NEWLINE(*s)) {
STAT_OP_OUT;
continue;
}
@@ -1408,7 +1724,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
STAT_OP_OUT;
@@ -1417,7 +1733,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- if (IS_NEWLINE(*s) && ON_STR_END(s+1)) {
+ if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) {
STAT_OP_OUT;
continue;
}
@@ -1463,9 +1779,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
#ifdef USE_SUBEXP_CALL
case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_END(mem, s);
STACK_GET_MEM_START(mem, stkp);
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- STACK_PUSH_MEM_END(mem, s);
STAT_OP_OUT;
continue;
break;
@@ -1509,19 +1825,51 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (BIT_STATUS_AT(reg->backtrack_mem, mem))
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (find_cond != 0
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
DATA_ENSURE(n);
sprev = s;
STRING_CMP(pstart, s, n);
- while (sprev + (len = mblen(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
+ GET_MEMNUM_INC(mem, p);
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(pstart, &s, n);
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
sprev += len;
STAT_OP_OUT;
@@ -1541,12 +1889,12 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (BIT_STATUS_AT(reg->backtrack_mem, mem))
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (find_cond != 0
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
@@ -1556,7 +1904,46 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = mblen(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enc_len(encode, *sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -1598,10 +1985,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_NULL_CHECK(isnull, mem, s);
if (isnull) {
-#ifdef REG_DEBUG_MATCH
+#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
(int )mem, (int )s);
#endif
+ null_check_found:
/* empty loop founded, skip next instruction */
switch (*p++) {
case OP_JUMP:
@@ -1622,6 +2010,56 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
continue;
break;
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+#else
+ STACK_NULL_CHECK_REC(isnull, mem, s);
+#endif
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
case OP_JUMP: STAT_OP_IN(OP_JUMP);
GET_RELADDR_INC(addr, p);
p += addr;
@@ -1757,16 +2195,21 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
stkp = STACK_AT(si);
#endif
stkp->u.repeat.count++;
- if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
}
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(stkp->u.repeat.pcode, s, sprev);
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
}
- else {
- p = stkp->u.repeat.pcode;
- }
- STACK_PUSH_REPEAT_INC(si);
}
STAT_OP_OUT;
continue;
@@ -1814,9 +2257,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
GET_LENGTH_INC(tlen, p);
- s = MBBACK(encode, str, s, (int )tlen);
+ s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
- sprev = regex_get_prev_char_head(encode, str, s);
+ sprev = onigenc_get_prev_char_head(encode, str, s);
STAT_OP_OUT;
continue;
break;
@@ -1824,7 +2267,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
- q = MBBACK(encode, str, s, (int )tlen);
+ q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(q)) {
/* too short case -> success. ex. /(?<!XXX)a/.match("a")
If you want to change to fail, replace following line. */
@@ -1834,7 +2277,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
else {
STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
s = q;
- sprev = regex_get_prev_char_head(encode, str, s);
+ sprev = onigenc_get_prev_char_head(encode, str, s);
}
STAT_OP_OUT;
continue;
@@ -1889,184 +2332,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STACK_SAVE;
return best_len;
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
stack_error:
STACK_SAVE;
- return REGERR_STACK_BUG;
+ return ONIGERR_STACK_BUG;
#endif
bytecode_error:
STACK_SAVE;
- return REGERR_UNDEFINED_BYTECODE;
+ return ONIGERR_UNDEFINED_BYTECODE;
unexpected_bytecode_error:
STACK_SAVE;
- return REGERR_UNEXPECTED_BYTECODE;
-}
-
-
-UChar* DefaultTransTable = (UChar* )0;
-
-#ifndef REG_RUBY_M17N
-static const char SJIS_FOLLOW_TABLE[SINGLE_BYTE_SIZE] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
-};
-
-#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
-#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-#define sjis_ismbfirst(c) ismb(REGCODE_SJIS, (c))
-#define sjis_ismbtrail(c) SJIS_FOLLOW_TABLE[(c)]
-
-extern WCINT
-regex_mb2wc(UChar* p, UChar* end, RegCharEncoding code)
-{
- int c, i, len;
- WCINT n;
-
- if (code == REGCODE_UTF8) {
- c = *p++;
- len = mblen(code,c);
- if (len > 1) {
- len--;
- n = c & ((1 << (6 - len)) - 1);
- while (len--) {
- c = *p++;
- n = (n << 6) | (c & ((1 << 6) - 1));
- }
- }
- else
- n = c;
- }
- else {
- c = *p++;
- len = mblen(code,c);
- n = c;
- if (len == 1) return n;
-
- for (i = 1; i < len; i++) {
- if (p >= end) break;
- c = *p++;
- n <<= 8; n += c;
- }
- }
- return n;
-}
-#endif /* REG_RUBY_M17N */
-
-extern UChar*
-regex_get_left_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s)
-{
- UChar *p;
- int len;
-
- if (s <= start) return s;
- p = s;
-
-#ifdef REG_RUBY_M17N
- while (!m17n_islead(code, *p) && p > start) p--;
- while (p + (len = mblen(code, *p)) < s) {
- p += len;
- }
- if (p + len == s) return s;
- return p;
-#else
-
- if (code == REGCODE_ASCII) {
- return p;
- }
- else if (code == REGCODE_EUCJP) {
- while (!eucjp_islead(*p) && p > start) p--;
- len = mblen(code, *p);
- if (p + len > s) return p;
- p += len;
- return p + ((s - p) & ~1);
- }
- else if (code == REGCODE_SJIS) {
- if (sjis_ismbtrail(*p)) {
- while (p > start) {
- if (! sjis_ismbfirst(*--p)) {
- p++;
- break;
- }
- }
- }
- len = mblen(code, *p);
- if (p + len > s) return p;
- p += len;
- return p + ((s - p) & ~1);
- }
- else { /* REGCODE_UTF8 */
- while (!utf8_islead(*p) && p > start) p--;
- return p;
- }
-#endif /* REG_RUBY_M17N */
+ return ONIGERR_UNEXPECTED_BYTECODE;
}
-extern UChar*
-regex_get_right_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s)
-{
- UChar* p = regex_get_left_adjust_char_head(code, start, s);
-
- if (p < s) {
- p += mblen(code, *p);
- }
- return p;
-}
static UChar*
-get_right_adjust_char_head_with_prev(RegCharEncoding code,
- UChar* start, UChar* s, UChar** prev)
-{
- UChar* p = regex_get_left_adjust_char_head(code, start, s);
-
- if (p < s) {
- if (prev) *prev = p;
- p += mblen(code, *p);
- }
- else {
- if (prev) *prev = (UChar* )NULL; /* Sorry */
- }
- return p;
-}
-
-extern UChar*
-regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s)
-{
- if (s <= start)
- return (UChar* )NULL;
-
- return regex_get_left_adjust_char_head(code, start, s - 1);
-}
-
-static UChar*
-step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n)
-{
- while (IS_NOT_NULL(s) && n-- > 0) {
- if (s <= start)
- return (UChar* )NULL;
-
- s = regex_get_left_adjust_char_head(code, start, s - 1);
- }
- return s;
-}
-
-static UChar*
-slow_search(RegCharEncoding code, UChar* target, UChar* target_end,
+slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
UChar* text, UChar* text_end, UChar* text_range)
{
UChar *t, *p, *s, *end;
@@ -2089,14 +2372,15 @@ slow_search(RegCharEncoding code, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s += mblen(code, *s);
+ s += enc_len(enc, *s);
}
return (UChar* )NULL;
}
+#if 0
static int
-str_trans_match_after_head_byte(RegCharEncoding code,
+str_trans_match_after_head_byte(OnigEncoding enc,
int len, UChar* t, UChar* tend, UChar* p)
{
while (--len > 0) {
@@ -2105,23 +2389,19 @@ str_trans_match_after_head_byte(RegCharEncoding code,
}
if (len == 0) {
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
while (t < tend) {
- len = mblen(code, *p);
- if (len == 1) {
- if (*t != TOLOWER(code, *p))
- break;
- p++;
- t++;
- }
- else {
- if (*t != *p++) break;
- t++;
- while (--len > 0) {
- if (*t != *p) break;
- t++; p++;
- }
- if (len > 0) break;
- }
+ len = enc_len(enc, *p);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) break;
+ lowlen--;
+ }
+ if (lowlen > 0) break;
+ p += len;
}
if (t == tend)
return 1;
@@ -2129,14 +2409,36 @@ str_trans_match_after_head_byte(RegCharEncoding code,
return 0;
}
+#endif
+
+static int
+str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p)
+{
+ int len, lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+
+ while (t < tend) {
+ len = enc_len(enc, *p);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) return 0;
+ lowlen--;
+ }
+ p += len;
+ }
+
+ return 1;
+}
static UChar*
-slow_search_ic(RegCharEncoding code,
+slow_search_ic(OnigEncoding enc,
UChar* target, UChar* target_end,
UChar* text, UChar* text_end, UChar* text_range)
{
- int len;
+ int len, lowlen;
UChar *t, *p, *s, *end;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
end = text_end - (target_end - target) + 1;
if (end > text_range)
@@ -2145,13 +2447,21 @@ slow_search_ic(RegCharEncoding code,
s = text;
while (s < end) {
- len = mblen(code, *s);
- if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) {
- p = s + 1;
+ len = enc_len(enc, *s);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
+ if (*target == *lowbuf) {
+ p = lowbuf + 1;
t = target + 1;
- if (str_trans_match_after_head_byte(code, len, t, target_end, p))
- return s;
+ while (--lowlen > 0) {
+ if (*p != *t) break;
+ p++; *t++;
+ }
+ if (lowlen == 0) {
+ if (str_lower_case_match(enc, t, target_end, s + len))
+ return s;
+ }
}
+
s += len;
}
@@ -2159,7 +2469,7 @@ slow_search_ic(RegCharEncoding code,
}
static UChar*
-slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end,
+slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start)
{
UChar *t, *p, *s;
@@ -2168,7 +2478,7 @@ slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end,
if (s > text_start)
s = text_start;
else
- s = regex_get_left_adjust_char_head(code, adjust_text, s);
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
if (*s == *target) {
@@ -2182,36 +2492,45 @@ slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s = regex_get_prev_char_head(code, adjust_text, s);
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-slow_search_backward_ic(RegCharEncoding code,
+slow_search_backward_ic(OnigEncoding enc,
UChar* target,UChar* target_end,
UChar* text, UChar* adjust_text,
UChar* text_end, UChar* text_start)
{
- int len;
+ int len, lowlen;
UChar *t, *p, *s;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
s = text_end - (target_end - target);
if (s > text_start)
s = text_start;
else
- s = regex_get_left_adjust_char_head(code, adjust_text, s);
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- len = mblen(code, *s);
- if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) {
- p = s + 1;
+ len = enc_len(enc, *s);
+ lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
+ if (*target == *lowbuf) {
+ p = lowbuf + 1;
t = target + 1;
- if (str_trans_match_after_head_byte(code, len, t, target_end, p))
- return s;
+ while (--lowlen > 0) {
+ if (*p != *t) break;
+ p++; *t++;
+ }
+ if (lowlen == 0) {
+ if (str_lower_case_match(enc, t, target_end, s + len))
+ return s;
+ }
}
- s = regex_get_prev_char_head(code, adjust_text, s);
+
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
@@ -2232,7 +2551,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
tail = target_end - 1;
s = text;
while ((s - text) < target_end - target) {
- s += mblen(reg->enc, *s);
+ s += enc_len(reg->enc, *s);
}
s--; /* set to text check tail position. */
@@ -2249,7 +2568,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
p++;
t = p;
while ((p - t) < skip) {
- p += mblen(reg->enc, *p);
+ p += enc_len(reg->enc, *p);
}
s += (p - t);
}
@@ -2267,7 +2586,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
p++;
t = p;
while ((p - t) < skip) {
- p += mblen(reg->enc, *p);
+ p += enc_len(reg->enc, *p);
}
s += (p - t);
}
@@ -2314,23 +2633,26 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
}
static int
-set_bm_backward_skip(UChar* s, UChar* end, RegCharEncoding enc,
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
int ignore_case, int** skip)
{
int i, len;
+ UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * REG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return REGERR_MEMORY;
+ *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*skip)) return ONIGERR_MEMORY;
}
len = end - s;
- for (i = 0; i < REG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
(*skip)[i] = len;
if (ignore_case) {
- for (i = len - 1; i > 0; i--)
- (*skip)[TOLOWER(enc, s[i])] = i;
+ for (i = len - 1; i > 0; i--) {
+ ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
+ (*skip)[*lowbuf] = i;
+ }
}
else {
for (i = len - 1; i > 0; i--)
@@ -2349,7 +2671,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
if (text_start < s)
s = text_start;
else
- s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s);
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
while (s >= text) {
p = s;
@@ -2361,27 +2683,27 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
return s;
s -= reg->int_map_backward[*s];
- s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s);
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-map_search(RegCharEncoding code, UChar map[], UChar* text, UChar* text_range)
+map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range)
{
UChar *s = text;
while (s < text_range) {
if (map[*s]) return s;
- s += mblen(code, *s);
+ s += enc_len(enc, *s);
}
return (UChar* )NULL;
}
static UChar*
-map_search_backward(RegCharEncoding code, UChar map[],
+map_search_backward(OnigEncoding enc, UChar map[],
UChar* text, UChar* adjust_text, UChar* text_start)
{
UChar *s = text_start;
@@ -2389,14 +2711,14 @@ map_search_backward(RegCharEncoding code, UChar map[],
while (s >= text) {
if (map[*s]) return s;
- s = regex_get_prev_char_head(code, adjust_text, s);
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
extern int
-regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, RegRegion* region,
- RegOptionType option)
+onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
+ OnigOptionType option)
{
int r;
UChar *prev;
@@ -2404,13 +2726,18 @@ regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, RegRegion* region,
MATCH_ARG_INIT(msa, option, region, at);
- if (region && !IS_POSIX_REGION(option))
- r = regex_region_resize(region, reg->num_mem + 1);
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize(region, reg->num_mem + 1);
+ }
else
r = 0;
if (r == 0) {
- prev = regex_get_prev_char_head(reg->enc, str, at);
+ prev = onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end, at, prev, &msa);
}
MATCH_ARG_FREE(msa);
@@ -2423,40 +2750,40 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
{
UChar *p, *pprev = (UChar* )NULL;
-#ifdef REG_DEBUG_SEARCH
+#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
(int )str, (int )end, (int )s, (int )range);
#endif
p = s;
if (reg->dmin > 0) {
- if (IS_SINGLEBYTE_CODE(reg->enc)) {
+ if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
p += reg->dmin;
}
else {
UChar *q = p + reg->dmin;
- while (p < q) p += mblen(reg->enc, *p);
+ while (p < q) p += enc_len(reg->enc, *p);
}
}
retry:
switch (reg->optimize) {
- case REG_OPTIMIZE_EXACT:
+ case ONIG_OPTIMIZE_EXACT:
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
- case REG_OPTIMIZE_EXACT_IC:
+ case ONIG_OPTIMIZE_EXACT_IC:
p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
- case REG_OPTIMIZE_EXACT_BM:
+ case ONIG_OPTIMIZE_EXACT_BM:
p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case REG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case REG_OPTIMIZE_MAP:
+ case ONIG_OPTIMIZE_MAP:
p = map_search(reg->enc, reg->map, p, range);
break;
}
@@ -2465,7 +2792,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (p - reg->dmin < s) {
retry_gate:
pprev = p;
- p += mblen(reg->enc, *p);
+ p += enc_len(reg->enc, *p);
goto retry;
}
@@ -2475,19 +2802,21 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
- prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
- if (!IS_NEWLINE(*prev))
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (!ONIG_IS_NEWLINE(*prev))
goto retry_gate;
}
break;
case ANCHOR_END_LINE:
if (ON_STR_END(p)) {
- prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
- if (prev && IS_NEWLINE(*prev))
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIG_IS_NEWLINE(*prev))
goto retry_gate;
}
- else if (!IS_NEWLINE(*p))
+ else if (!ONIG_IS_NEWLINE(*p))
goto retry_gate;
break;
}
@@ -2497,33 +2826,33 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
*low = p;
if (low_prev) {
if (*low > s)
- *low_prev = regex_get_prev_char_head(reg->enc, s, p);
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
else
- *low_prev = regex_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
}
}
else {
- if (reg->dmax != INFINITE_DISTANCE) {
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
*low = p - reg->dmax;
if (*low > s) {
- *low = get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, low_prev);
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, low_prev);
if (low_prev && IS_NULL(*low_prev))
- *low_prev = regex_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low);
}
else {
if (low_prev)
- *low_prev = regex_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low);
}
}
}
/* no needs to adjust *high, *high is used as range check only */
*high = p - reg->dmin;
-#ifdef REG_DEBUG_SEARCH
+#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
(int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
@@ -2534,7 +2863,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
return 0; /* fail */
}
-static int set_bm_backward_skip P_((UChar* s, UChar* end, RegCharEncoding enc,
+static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
int ignore_case, int** skip));
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
@@ -2551,19 +2880,19 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
retry:
switch (reg->optimize) {
- case REG_OPTIMIZE_EXACT:
+ case ONIG_OPTIMIZE_EXACT:
exact_method:
p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case REG_OPTIMIZE_EXACT_IC:
+ case ONIG_OPTIMIZE_EXACT_IC:
p = slow_search_backward_ic(reg->enc, reg->exact,
reg->exact_end, range, adjrange, end, p);
break;
- case REG_OPTIMIZE_EXACT_BM:
- case REG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case ONIG_OPTIMIZE_EXACT_BM:
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
if (IS_NULL(reg->int_map_backward)) {
if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
goto exact_method;
@@ -2576,7 +2905,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
end, p);
break;
- case REG_OPTIMIZE_MAP:
+ case ONIG_OPTIMIZE_MAP:
p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
break;
}
@@ -2588,8 +2917,8 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
- prev = regex_get_prev_char_head(reg->enc, adjrange, p);
- if (!IS_NEWLINE(*prev)) {
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (!ONIG_IS_NEWLINE(*prev)) {
p = prev;
goto retry;
}
@@ -2598,15 +2927,15 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
case ANCHOR_END_LINE:
if (ON_STR_END(p)) {
- prev = regex_get_prev_char_head(reg->enc, adjrange, p);
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(prev)) goto fail;
- if (IS_NEWLINE(*prev)) {
+ if (ONIG_IS_NEWLINE(*prev)) {
p = prev;
goto retry;
}
}
- else if (!IS_NEWLINE(*p)) {
- p = regex_get_prev_char_head(reg->enc, adjrange, p);
+ else if (!ONIG_IS_NEWLINE(*p)) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;
}
@@ -2615,13 +2944,13 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
/* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != INFINITE_DISTANCE) {
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
*low = p - reg->dmax;
*high = p - reg->dmin;
- *high = regex_get_right_adjust_char_head(reg->enc, adjrange, *high);
+ *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
}
-#ifdef REG_DEBUG_SEARCH
+#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
(int )(*low - str), (int )(*high - str));
#endif
@@ -2629,7 +2958,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
fail:
-#ifdef REG_DEBUG_SEARCH
+#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "backward_search_range: fail.\n");
#endif
return 0; /* fail */
@@ -2637,37 +2966,41 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
extern int
-regex_search(regex_t* reg, UChar* str, UChar* end,
- UChar* start, UChar* range, RegRegion* region, RegOptionType option)
+onig_search(regex_t* reg, UChar* str, UChar* end,
+ UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)
{
int r;
UChar *s, *prev;
MatchArg msa;
- if (REG_STATE(reg) == REG_STATE_NORMAL) {
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
reg->state++; /* increment as search counter */
if (IS_NOT_NULL(reg->chain)) {
- regex_chain_reduce(reg);
+ onig_chain_reduce(reg);
reg->state++;
}
}
else {
int n = 0;
- while (REG_STATE(reg) < REG_STATE_NORMAL) {
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
- return REGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
reg->state++; /* increment as search counter */
}
-#ifdef REG_DEBUG_SEARCH
- fprintf(stderr, "regex_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
(int )str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
- if (region && !IS_POSIX_REGION(option)) {
- r = regex_region_resize(region, reg->num_mem + 1);
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize(region, reg->num_mem + 1);
if (r) goto finish_no_msa;
}
@@ -2675,7 +3008,7 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
#define MATCH_AND_RETURN_CHECK \
r = match_at(reg, str, end, s, prev, &msa);\
- if (r != REG_MISMATCH) {\
+ if (r != ONIG_MISMATCH) {\
if (r >= 0) goto match;\
goto finish; /* error */ \
}
@@ -2718,9 +3051,9 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
if (semi_end - start > reg->anchor_dmax) {
start = semi_end - reg->anchor_dmax;
if (start < end)
- start = regex_get_right_adjust_char_head(reg->enc, str, start);
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
else { /* match with empty at end */
- start = regex_get_prev_char_head(reg->enc, str, end);
+ start = onigenc_get_prev_char_head(reg->enc, str, end);
}
}
if (semi_end - (range - 1) < reg->anchor_dmin) {
@@ -2735,13 +3068,13 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
}
if (semi_end - start < reg->anchor_dmin) {
start = semi_end - reg->anchor_dmin;
- start = regex_get_left_adjust_char_head(reg->enc, str, start);
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
if (range > start) goto mismatch_no_msa;
}
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
- if (IS_NEWLINE(end[-1])) {
+ if (ONIG_IS_NEWLINE(end[-1])) {
semi_end = end - 1;
if (semi_end > str && start <= semi_end) {
goto end_buf;
@@ -2759,8 +3092,8 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
else if (str == end) { /* empty string */
static UChar* address_for_empty_string = "";
-#ifdef REG_DEBUG_SEARCH
- fprintf(stderr, "regex_search: empty string.\n");
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search: empty string.\n");
#endif
if (reg->threshold_len == 0) {
@@ -2774,8 +3107,8 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
goto mismatch_no_msa;
}
-#ifdef REG_DEBUG_SEARCH
- fprintf(stderr, "regex_search(apply anchor): end: %d, start: %d, range: %d\n",
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
@@ -2784,23 +3117,23 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
s = start;
if (range > start) { /* forward search */
if (s > str)
- prev = regex_get_prev_char_head(reg->enc, str, s);
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
else
prev = (UChar* )NULL;
- if (reg->optimize != REG_OPTIMIZE_NONE) {
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
sch_range = range;
if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_DISTANCE)
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
sch_range = end;
else {
sch_range += reg->dmax;
if (sch_range > end) sch_range = end;
}
}
- if (reg->dmax != INFINITE_DISTANCE &&
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
(end - start) >= reg->threshold_len) {
do {
if (! forward_search_range(reg, str, end, s, sch_range,
@@ -2812,13 +3145,13 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += mblen(reg->enc, *s);
+ s += enc_len(reg->enc, *s);
}
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
if (IS_NOT_NULL(prev)) {
- while (!IS_NEWLINE(*prev) && s < range) {
+ while (!ONIG_IS_NEWLINE(*prev) && s < range) {
prev = s;
- s += mblen(reg->enc, *s);
+ s += enc_len(reg->enc, *s);
}
}
}
@@ -2835,15 +3168,15 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
do {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += mblen(reg->enc, *s);
+ s += enc_len(reg->enc, *s);
} while (s <= range); /* exec s == range, because empty match with /$/. */
}
else { /* backward search */
- if (reg->optimize != REG_OPTIMIZE_NONE) {
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
- adjrange = regex_get_left_adjust_char_head(reg->enc, str, range);
- if (reg->dmax != INFINITE_DISTANCE &&
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
(end - range) >= reg->threshold_len) {
do {
sch_start = s + reg->dmax;
@@ -2856,7 +3189,7 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
s = high;
while (s >= low) {
- prev = regex_get_prev_char_head(reg->enc, str, s);
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK;
s = prev;
}
@@ -2868,14 +3201,14 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
sch_start = s;
if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_DISTANCE)
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
sch_start = end;
else {
sch_start += reg->dmax;
if (sch_start > end) sch_start = end;
else
- sch_start = regex_get_left_adjust_char_head(reg->enc, start,
- sch_start);
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start);
}
}
if (backward_search_range(reg, str, end, sch_start, range, adjrange,
@@ -2884,14 +3217,14 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
}
do {
- prev = regex_get_prev_char_head(reg->enc, str, s);
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK;
s = prev;
} while (s >= range);
}
mismatch:
- r = REG_MISMATCH;
+ r = ONIG_MISMATCH;
finish:
MATCH_ARG_FREE(msa);
@@ -2899,22 +3232,27 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
- if (IS_FIND_NOT_EMPTY(reg->options) && region && !IS_POSIX_REGION(option))
- regex_region_clear(region);
+ if (IS_FIND_NOT_EMPTY(reg->options) && region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ onig_region_clear(region);
+ }
-#ifdef REG_DEBUG
- if (r != REG_MISMATCH)
- fprintf(stderr, "regex_search: error %d\n", r);
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
#endif
return r;
mismatch_no_msa:
- r = REG_MISMATCH;
+ r = ONIG_MISMATCH;
finish_no_msa:
reg->state--; /* decrement as search counter */
-#ifdef REG_DEBUG
- if (r != REG_MISMATCH)
- fprintf(stderr, "regex_search: error %d\n", r);
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
#endif
return r;
@@ -2924,8 +3262,26 @@ regex_search(regex_t* reg, UChar* str, UChar* end,
return s - str;
}
+extern OnigEncoding
+onig_get_encoding(regex_t* reg)
+{
+ return reg->enc;
+}
+
+extern OnigOptionType
+onig_get_options(regex_t* reg)
+{
+ return reg->options;
+}
+
+extern OnigSyntaxType*
+onig_get_syntax(regex_t* reg)
+{
+ return reg->syntax;
+}
+
extern const char*
-regex_version(void)
+onig_version(void)
{
#define MSTR(a) # a
diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c
index 7b95e26f76..9c6a2161c2 100644
--- a/ext/mbstring/oniguruma/reggnu.c
+++ b/ext/mbstring/oniguruma/reggnu.c
@@ -2,13 +2,17 @@
reggnu.c - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
+#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
+#include "oniggnu.h"
+#endif
+
#if defined(RUBY_PLATFORM) || defined(RUBY)
-#ifndef REG_RUBY_M17N
+#ifndef ONIG_RUBY_M17N
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
#endif
#endif
@@ -18,25 +22,25 @@
#endif
extern void
-re_free_registers(RegRegion* r)
+re_free_registers(OnigRegion* r)
{
/* 0: don't free self */
- regex_region_free(r, 0);
+ onig_region_free(r, 0);
}
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
{
- if (startpos > 0 && mbmaxlen(reg->enc) != 1 && startpos < size) {
+ if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
UChar *s = (UChar* )string + startpos;
if (range > 0) {
- p = regex_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
+ p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
}
else {
- p = regex_get_left_adjust_char_head(reg->enc, (UChar* )string, s);
+ p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
}
return p - (UChar* )string;
}
@@ -48,29 +52,30 @@ extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
{
- return regex_match(reg, (UChar* )str, (UChar* )(str + size),
- (UChar* )(str + pos), regs, REG_OPTION_NONE);
+ return onig_match(reg, (UChar* )str, (UChar* )(str + size),
+ (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
{
- return regex_search(bufp, (UChar* )string, (UChar* )(string + size),
- (UChar* )(string + startpos),
- (UChar* )(string + startpos + range), regs, REG_OPTION_NONE);
+ return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
+ (UChar* )(string + startpos),
+ (UChar* )(string + startpos + range),
+ regs, ONIG_OPTION_NONE);
}
extern int
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
- RegErrorInfo einfo;
+ OnigErrorInfo einfo;
- r = regex_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
+ r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
- (void )regex_error_code_to_str((UChar* )ebuf, r, &einfo);
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
@@ -80,19 +85,19 @@ extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
- RegErrorInfo einfo;
- RegCharEncoding enc;
+ OnigErrorInfo einfo;
+ OnigEncoding enc;
/* I think encoding and options should be arguments of this function.
But this is adapted to present re.c. (2002/11/29)
*/
- enc = RegDefaultCharEncoding;
+ enc = OnigEncDefaultCharEncoding;
- r = regex_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
- reg->options, enc, RegDefaultSyntax, &einfo);
+ r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
+ reg->options, enc, OnigDefaultSyntax, &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
- (void )regex_error_code_to_str((UChar* )ebuf, r, &einfo);
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
@@ -100,23 +105,20 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
extern void
re_free_pattern(regex_t* reg)
{
- regex_free(reg);
+ onig_free(reg);
}
extern int
re_alloc_pattern(regex_t** reg)
{
- if (RegDefaultCharEncoding == REGCODE_UNDEF)
- return REGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
-
- return regex_alloc_init(reg, REG_OPTION_DEFAULT, RegDefaultCharEncoding,
- RegDefaultSyntax);
+ return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
+ OnigDefaultSyntax);
}
extern void
re_set_casetable(const char* table)
{
- regex_set_default_trans_table((UChar* )table);
+ onigenc_set_default_caseconv_table((UChar* )table);
}
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
@@ -167,7 +169,7 @@ static const unsigned char mbctab_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -200,16 +202,39 @@ const unsigned char *re_mbctab = mbctab_ascii;
#endif
extern void
-#ifdef REG_RUBY_M17N
-re_mbcinit(RegCharEncoding enc)
+#ifdef ONIG_RUBY_M17N
+re_mbcinit(OnigEncoding enc)
#else
re_mbcinit(int mb_code)
#endif
{
-#ifdef REG_RUBY_M17N
- RegDefaultCharEncoding = enc;
+#ifdef ONIG_RUBY_M17N
+
+ onigenc_set_default_encoding(enc);
+
#else
- RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code];
+
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case MBCTYPE_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case MBCTYPE_EUC:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case MBCTYPE_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case MBCTYPE_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
#endif
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h
index d646dd11f0..c01a73c546 100644
--- a/ext/mbstring/oniguruma/regint.h
+++ b/ext/mbstring/oniguruma/regint.h
@@ -2,56 +2,62 @@
regint.h - Oniguruma (regular expression library)
- Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
/* for debug */
-/* #define REG_DEBUG_PARSE_TREE */
-/* #define REG_DEBUG_COMPILE */
-/* #define REG_DEBUG_SEARCH */
-/* #define REG_DEBUG_MATCH */
-/* #define REG_DONT_OPTIMIZE */
+/* #define ONIG_DEBUG_PARSE_TREE */
+/* #define ONIG_DEBUG_COMPILE */
+/* #define ONIG_DEBUG_SEARCH */
+/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
-/* #define REG_DEBUG_STATISTICS */
+/* #define ONIG_DEBUG_STATISTICS */
-#if defined(REG_DEBUG_PARSE_TREE) || defined(REG_DEBUG_MATCH) || \
- defined(REG_DEBUG_COMPILE) || defined(REG_DEBUG_STATISTICS)
-#ifndef REG_DEBUG
-#define REG_DEBUG
+#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
+ defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
+#ifndef ONIG_DEBUG
+#define ONIG_DEBUG
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
(defined(__ppc__) && defined(__APPLE__)) || \
+ defined(__x86_64) || defined(__x86_64__) || \
defined(__mc68020__)
-#define UNALIGNED_WORD_ACCESS
+#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
-#define USE_NAMED_SUBEXP
+/* spec. config */
+#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
+#define USE_FOLD_MATCH /* ess-tsett etc... */
+#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
+#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
+#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* internal config */
+#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
-#define USE_RECYCLE_NODE
-#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
-/* #define USE_SBMB_CLASS */
#define INIT_MATCH_STACK_SIZE 160
-#define MATCH_STACK_LIMIT_SIZE 200000
+#define MATCH_STACK_LIMIT_SIZE 500000
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
+#define USE_VARIABLE_META_CHARS
#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
-#define DEFAULT_TRANSTABLE_EXIST 1
-#define THREAD_ATOMIC_START /* depend on thread system */
-#define THREAD_ATOMIC_END /* depend on thread system */
-#define THREAD_PASS /* depend on thread system */
+#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+#define THREAD_ATOMIC_START /* depend on thread system */
+#define THREAD_ATOMIC_END /* depend on thread system */
+#define THREAD_PASS /* depend on thread system */
#define xmalloc malloc
#define xrealloc realloc
#define xfree free
@@ -59,12 +65,11 @@
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
-#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
-#define THREAD_ATOMIC_START DEFER_INTS
-#define THREAD_ATOMIC_END ENABLE_INTS
-#define THREAD_PASS /* I want to use rb_thread_pass() */
-#define WARNING rb_warn
-#define VERB_WARNING rb_warning
+#define THREAD_ATOMIC_START DEFER_INTS
+#define THREAD_ATOMIC_END ENABLE_INTS
+#define THREAD_PASS rb_thread_schedule()
+#define DEFAULT_WARN_FUNCTION rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION rb_warning
#if defined(RUBY_VERSION_MAJOR)
#if RUBY_VERSION_MAJOR > 1 || \
@@ -74,6 +79,8 @@
#endif
#endif
+#define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION(s,f,n) \
+ rb_define_global_function(s, f, n)
#endif /* else NOT_RUBY */
#define THREAD_PASS_LIMIT_COUNT 10
@@ -82,7 +89,9 @@
#define xmemmove memmove
#if defined(_WIN32) && !defined(__CYGWIN__)
#define xalloca _alloca
+#ifdef NOT_RUBY
#define vsnprintf _vsnprintf
+#endif
#else
#define xalloca alloca
#endif
@@ -104,15 +113,12 @@
#include <ctype.h>
#include <sys/types.h>
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
-#ifdef NOT_RUBY
-# include "oniguruma.h"
-#else
-# include "regex.h"
-#endif
+#include "regenc.h"
+#include "oniguruma.h"
#ifdef MIN
#undef MIN
@@ -123,17 +129,24 @@
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
-#ifndef UNALIGNED_WORD_ACCESS
+#define IS_NULL(p) (((void*)(p)) == (void*)0)
+#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
+#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
+#define NULL_UCHARP ((UChar* )0)
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
- (pad_size) = WORD_ALIGNMENT_SIZE - ((int )(addr) % WORD_ALIGNMENT_SIZE);\
+ (pad_size) = WORD_ALIGNMENT_SIZE \
+ - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
- (addr) -= ((int )(addr) % WORD_ALIGNMENT_SIZE);\
+ (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
@@ -216,7 +229,7 @@
#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
-#endif /* UNALIGNED_WORD_ACCESS */
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
@@ -224,12 +237,12 @@
#define STACK_POP_LEVEL_ALL 2
/* optimize flags */
-#define REG_OPTIMIZE_NONE 0
-#define REG_OPTIMIZE_EXACT 1 /* Slow Search */
-#define REG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
-#define REG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
-#define REG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
-#define REG_OPTIMIZE_MAP 5 /* char map */
+#define ONIG_OPTIMIZE_NONE 0
+#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
+#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
+#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
+#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
+#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
typedef unsigned int BitStatusType;
@@ -255,71 +268,32 @@ typedef unsigned int BitStatusType;
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
-typedef unsigned int WCINT;
-
-#define SIZE_WCINT sizeof(WCINT)
-#define GET_WCINT(wc,p) (wc) = *((WCINT* )(p))
-
-#define INFINITE_DISTANCE ~((RegDistance )0)
-
-#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
-# define IS_ASCII(c) 1
-#else
-# define IS_ASCII(c) isascii(c)
-#endif
-
-#ifdef isblank
-# define IS_BLANK(c) (IS_ASCII(c) && isblank(c))
-#else
-# define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
-#endif
-#ifdef isgraph
-# define IS_GRAPH(c) (IS_ASCII(c) && isgraph(c))
-#else
-# define IS_GRAPH(c) (IS_ASCII(c) && isprint(c) && !isspace(c))
-#endif
-
-#define IS_PRINT(c) (isprint(c) && IS_ASCII(c))
-#define IS_ALNUM(c) (isalnum(c) && IS_ASCII(c))
-#define IS_ALPHA(c) (isalpha(c) && IS_ASCII(c))
-#define IS_LOWER(c) (islower(c) && IS_ASCII(c))
-#define IS_UPPER(c) (isupper(c) && IS_ASCII(c))
-#define IS_CNTRL(c) (iscntrl(c) && IS_ASCII(c))
-#define IS_PUNCT(c) (ispunct(c) && IS_ASCII(c))
-#define IS_SPACE(c) (isspace(c) && IS_ASCII(c))
-#define IS_DIGIT(c) (isdigit(c) && IS_ASCII(c))
-#define IS_XDIGIT(c) (isxdigit(c) && IS_ASCII(c))
-#define IS_ODIGIT(c) (IS_DIGIT(c) && (c) < '8')
-
-#define DIGITVAL(c) ((c) - '0')
-#define ODIGITVAL(c) DIGITVAL(c)
-#define XDIGITVAL(c) \
- (IS_DIGIT(c) ? DIGITVAL(c) : (IS_UPPER(c) ? (c) - 'A' + 10 : (c) - 'a' + 10))
-
-#define IS_SINGLELINE(option) ((option) & REG_OPTION_SINGLELINE)
-#define IS_MULTILINE(option) ((option) & REG_OPTION_MULTILINE)
-#define IS_IGNORECASE(option) ((option) & REG_OPTION_IGNORECASE)
-#define IS_EXTEND(option) ((option) & REG_OPTION_EXTEND)
-#define IS_FIND_LONGEST(option) ((option) & REG_OPTION_FIND_LONGEST)
-#define IS_FIND_NOT_EMPTY(option) ((option) & REG_OPTION_FIND_NOT_EMPTY)
+#define DIGITVAL(code) ((code) - '0')
+#define ODIGITVAL(code) DIGITVAL(code)
+#define XDIGITVAL(enc,code) \
+ (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
+ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
+
+#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
+#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
+#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
+#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
+#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
+#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
#define IS_FIND_CONDITION(option) ((option) & \
- (REG_OPTION_FIND_LONGEST | REG_OPTION_FIND_NOT_EMPTY))
-#define IS_NOTBOL(option) ((option) & REG_OPTION_NOTBOL)
-#define IS_NOTEOL(option) ((option) & REG_OPTION_NOTEOL)
-#define IS_POSIX_REGION(option) ((option) & REG_OPTION_POSIX_REGION)
+ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
+#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
+#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
+#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
-#ifdef NEWLINE
-#undef NEWLINE
-#endif
-#define NEWLINE '\n'
-#define IS_NULL(p) (((void*)(p)) == (void*)0)
-#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
-#define IS_NEWLINE(c) ((c) == NEWLINE)
-#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
-#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
+/* OP_SET_OPTION is required for these options.
+#define IS_DYNAMIC_OPTION(option) \
+ (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
+*/
+/* ignore-case and multibyte status are included in compiled code. */
+#define IS_DYNAMIC_OPTION(option) 0
-#define NULL_UCHARP ((UChar* )0)
/* bitset */
#define BITS_PER_BYTE 8
@@ -327,7 +301,7 @@ typedef unsigned int WCINT;
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
@@ -357,18 +331,18 @@ typedef struct _BBuf {
unsigned int alloc;
} BBuf;
-#define BBUF_INIT(buf,size) regex_bbuf_init((BBuf* )(buf), (size))
+#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BBUF_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
- if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < low);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
- if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_ENSURE_SIZE(buf,size) do{\
@@ -376,7 +350,7 @@ typedef struct _BBuf {
while (new_alloc < (size)) { new_alloc *= 2; }\
if ((buf)->alloc != new_alloc) {\
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
- if (IS_NULL((buf)->p)) return(REGERR_MEMORY);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
(buf)->alloc = new_alloc;\
}\
} while (0)
@@ -430,112 +404,6 @@ typedef struct _BBuf {
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
-extern UChar* DefaultTransTable;
-#define TOLOWER(enc,c) (DefaultTransTable[c])
-
-/* methods for support multi-byte code, */
-#define ismb(code,c) (mblen((code),(c)) != 1)
-#define MB2WC(p,end,code) mb2wc((p),(end),(code))
-#define MBBACK(code,start,s,n) step_backward_char((code),(start),(s),(n))
-
-#ifdef REG_RUBY_M17N
-
-#define MB2WC_AVAILABLE(enc) 1
-#define WC2MB_FIRST(enc, wc) m17n_firstbyte((enc),(wc))
-
-#define mbmaxlen(enc) m17n_mbmaxlen(enc)
-#define mblen(enc,c) m17n_mbclen(enc,c)
-#define mbmaxlen_dist(enc) \
- (mbmaxlen(enc) > 0 ? mbmaxlen(enc) : INFINITE_DISTANCE)
-
-#define IS_SINGLEBYTE_CODE(enc) (m17n_mbmaxlen(enc) == 1)
-/* #define IS_INDEPENDENT_TRAIL(enc) m17n_independent_trail(enc) */
-#define IS_INDEPENDENT_TRAIL(enc) IS_SINGLEBYTE_CODE(enc)
-
-#define IS_CODE_ASCII(enc,c) IS_ASCII(c)
-#define IS_CODE_GRAPH(enc,c) IS_GRAPH(c)
-#define IS_CODE_PRINT(enc,c) m17n_isprint(enc,c)
-#define IS_CODE_ALNUM(enc,c) m17n_isalnum(enc,c)
-#define IS_CODE_ALPHA(enc,c) m17n_isalpha(enc,c)
-#define IS_CODE_LOWER(enc,c) m17n_islower(enc,c)
-#define IS_CODE_UPPER(enc,c) m17n_isupper(enc,c)
-#define IS_CODE_CNTRL(enc,c) m17n_iscntrl(enc,c)
-#define IS_CODE_PUNCT(enc,c) m17n_ispunct(enc,c)
-#define IS_CODE_SPACE(enc,c) m17n_isspace(enc,c)
-#define IS_CODE_BLANK(enc,c) IS_BLANK(c)
-#define IS_CODE_DIGIT(enc,c) m17n_isdigit(enc,c)
-#define IS_CODE_XDIGIT(enc,c) m17n_isxdigit(enc,c)
-
-#define IS_CODE_WORD(enc,c) m17n_iswchar(enc,c)
-#define ISNOT_CODE_WORD(enc,c) (!m17n_iswchar(enc,c))
-
-#define IS_WORD_STR(code,s,end) \
- (ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \
- m17n_iswchar(code,*(s)))
-#define IS_WORD_STR_INC(code,s,end) \
- (ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \
- (s++, m17n_iswchar(code,s[-1])))
-
-#define IS_WORD_HEAD(enc,c) (ismb(enc,c) ? 1 : IS_CODE_WORD(enc,c))
-
-#define IS_SB_WORD(code,c) (mblen(code,c) == 1 && IS_CODE_WORD(code,c))
-#define IS_MB_WORD(code,c) ismb(code,c)
-
-#define mb2wc(p,e,enc) m17n_codepoint((enc),(p),(e))
-
-#else /* REG_RUBY_M17N */
-
-#define mb2wc(p,e,code) regex_mb2wc((p),(e),(code))
-
-#define MB2WC_AVAILABLE(code) 1
-#define WC2MB_FIRST(code, wc) regex_wc2mb_first(code, wc)
-
-#define mbmaxlen_dist(code) mbmaxlen(code)
-#define mbmaxlen(code) regex_mb_max_length(code)
-#define mblen(code,c) (code)[(int )(c)]
-
-#define IS_SINGLEBYTE_CODE(code) ((code) == REGCODE_ASCII)
-#define IS_INDEPENDENT_TRAIL(code) \
- ((code) == REGCODE_ASCII || (code) == REGCODE_UTF8)
-
-#define IS_CODE_ASCII(code,c) IS_ASCII(c)
-#define IS_CODE_GRAPH(code,c) IS_GRAPH(c)
-#define IS_CODE_PRINT(code,c) IS_PRINT(c)
-#define IS_CODE_ALNUM(code,c) IS_ALNUM(c)
-#define IS_CODE_ALPHA(code,c) IS_ALPHA(c)
-#define IS_CODE_LOWER(code,c) IS_LOWER(c)
-#define IS_CODE_UPPER(code,c) IS_UPPER(c)
-#define IS_CODE_CNTRL(code,c) IS_CNTRL(c)
-#define IS_CODE_PUNCT(code,c) IS_PUNCT(c)
-#define IS_CODE_SPACE(code,c) IS_SPACE(c)
-#define IS_CODE_BLANK(code,c) IS_BLANK(c)
-#define IS_CODE_DIGIT(code,c) IS_DIGIT(c)
-#define IS_CODE_ODIGIT(code,c) IS_ODIGIT(c)
-#define IS_CODE_XDIGIT(code,c) IS_XDIGIT(c)
-
-#define IS_SB_WORD(code,c) (IS_CODE_ALNUM(code,c) || (c) == '_')
-#define IS_MB_WORD(code,c) ismb(code,c)
-
-#define IS_CODE_WORD(code,c) \
- (IS_SB_WORD(code,c) && ((c) < 0x80 || (code) == REGCODE_ASCII))
-#define ISNOT_CODE_WORD(code,c) \
- ((!IS_SB_WORD(code,c)) && !ismb(code,c))
-
-#define IS_WORD_STR(code,s,end) \
- (ismb((code),*(s)) ? (s + mblen((code),*(s)) <= (end)) : \
- IS_SB_WORD(code,*(s)))
-#define IS_WORD_STR_INC(code,s,end) \
- (ismb((code),*(s)) ? ((s) += mblen((code),*(s)), (s) <= (end)) : \
- (s++, IS_SB_WORD(code,s[-1])))
-
-#define IS_WORD_HEAD(code,c) (ismb(code,c) ? 1 : IS_SB_WORD(code,c))
-
-extern int regex_mb_max_length P_((RegCharEncoding code));
-extern WCINT regex_mb2wc P_((UChar* p, UChar* end, RegCharEncoding code));
-extern int regex_wc2mb_first P_((RegCharEncoding code, WCINT wc));
-
-#endif /* not REG_RUBY_M17N */
-
#define ANCHOR_BEGIN_BUF (1<<0)
#define ANCHOR_BEGIN_LINE (1<<1)
@@ -571,7 +439,7 @@ enum OpCode {
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
OP_EXACTMB2N, /* mb-length = 2 */
- OP_EXACTMB3N, /* mb length = 3 */
+ OP_EXACTMB3N, /* mb-length = 3 */
OP_EXACTMBN, /* other length */
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
@@ -584,9 +452,12 @@ enum OpCode {
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
- OP_ANYCHAR, /* "." */
- OP_ANYCHAR_STAR, /* ".*" */
+ OP_ANYCHAR, /* "." */
+ OP_ANYCHAR_ML, /* "." multi-line */
+ OP_ANYCHAR_STAR, /* ".*" */
+ OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT,
OP_WORD,
OP_NOT_WORD,
@@ -608,7 +479,9 @@ enum OpCode {
OP_BACKREF2,
OP_BACKREF3,
OP_BACKREFN,
+ OP_BACKREFN_IC,
OP_BACKREF_MULTI,
+ OP_BACKREF_MULTI_IC,
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
@@ -632,6 +505,8 @@ enum OpCode {
OP_REPEAT_INC_NG, /* non greedy */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
+ OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
@@ -668,9 +543,10 @@ typedef int RepeatNumType;
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
-#define SIZE_OPTION sizeof(RegOptionType)
+#define SIZE_OPTION sizeof(OnigOptionType)
+#define SIZE_CODE_POINT sizeof(OnigCodePoint)
-#ifdef UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define GET_RELADDR_INC(addr,p) do{\
addr = *((RelAddrType* )(p));\
(p) += SIZE_RELADDR;\
@@ -697,7 +573,7 @@ typedef int RepeatNumType;
} while(0)
#define GET_OPTION_INC(option,p) do{\
- option = *((RegOptionType* )(p));\
+ option = *((OnigOptionType* )(p));\
(p) += SIZE_OPTION;\
} while(0)
#else
@@ -718,8 +594,10 @@ typedef int RepeatNumType;
#define SERIALIZE_BUFSIZE SIZEOF_INT
-#endif /* UNALIGNED_WORD_ACCESS */
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+/* code point's address must be aligned address. */
+#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
@@ -760,31 +638,50 @@ typedef int RepeatNumType;
#define SIZE_OP_RETURN SIZE_OPCODE
-#ifdef REG_DEBUG
+typedef struct {
+ OnigCodePoint esc;
+ OnigCodePoint anychar;
+ OnigCodePoint anytime;
+ OnigCodePoint zero_or_one_time;
+ OnigCodePoint one_or_more_time;
+ OnigCodePoint anychar_anytime;
+} OnigMetaCharTableType;
+
+extern OnigMetaCharTableType OnigMetaCharTable;
+
+#define MC_ESC OnigMetaCharTable.esc
+#define MC_ANYCHAR OnigMetaCharTable.anychar
+#define MC_ANYTIME OnigMetaCharTable.anytime
+#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
+#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
+
+
+#ifdef ONIG_DEBUG
typedef struct {
short int opcode;
char* name;
short int arg_type;
-} RegOpInfoType;
+} OnigOpInfoType;
-extern RegOpInfoType RegOpInfo[];
+extern OnigOpInfoType OnigOpInfo[];
-extern void regex_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
-#ifdef REG_DEBUG_STATISTICS
-extern void regex_statistics_init P_((void));
-extern void regex_print_statistics P_((FILE* f));
+#ifdef ONIG_DEBUG_STATISTICS
+extern void onig_statistics_init P_((void));
+extern void onig_print_statistics P_((FILE* f));
#endif
#endif
-extern char* regex_error_code_to_format P_((int code));
-extern void regex_snprintf_with_pattern PV_((char buf[], int bufsize, RegCharEncoding enc, char* pat, char* pat_end, char *fmt, ...));
-extern UChar* regex_strdup P_((UChar* s, UChar* end));
-extern int regex_bbuf_init P_((BBuf* buf, int size));
-extern int regex_alloc_init P_((regex_t** reg, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax));
-extern int regex_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, RegErrorInfo* einfo));
-extern void regex_chain_reduce P_((regex_t* reg));
-extern int regex_is_in_wc_range P_((UChar* p, WCINT wc));
+extern char* onig_error_code_to_format P_((int code));
+extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_strdup P_((UChar* s, UChar* end));
+extern int onig_bbuf_init P_((BBuf* buf, int size));
+extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
+extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
+extern void onig_chain_reduce P_((regex_t* reg));
+extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c
index 95a55b2a06..673432c00e 100644
--- a/ext/mbstring/oniguruma/regparse.c
+++ b/ext/mbstring/oniguruma/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regparse.h"
@@ -10,126 +10,242 @@
#define WARN_BUFSIZE 256
#define SYN_POSIX_COMMON_OP \
- ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_POSIX_BRACKET | REG_SYN_OP_BACK_REF | \
- REG_SYN_OP_CC | REG_SYN_OP_0INF | REG_SYN_OP_LINE_ANCHOR | \
- REG_SYN_OP_ESC_CONTROL_CHAR )
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
#define SYN_GNU_REGEX_OP \
- ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | \
- REG_SYN_OP_POSIX_BRACKET | REG_SYN_OP_BACK_REF | \
- REG_SYN_OP_INTERVAL | REG_SYN_OP_SUBEXP | REG_SYN_OP_ALT | \
- REG_SYN_OP_0INF | REG_SYN_OP_1INF | REG_SYN_OP_01 | \
- REG_SYN_OP_ESC_BUF_ANCHOR | REG_SYN_OP_ESC_WORD | \
- REG_SYN_OP_ESC_WORD_BOUND | REG_SYN_OP_ESC_WORD_BEGIN_END | \
- REG_SYN_OP_ESC_WHITE_SPACE | REG_SYN_OP_ESC_DIGIT | \
- REG_SYN_OP_LINE_ANCHOR )
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
#define SYN_GNU_REGEX_BV \
- ( REG_SYN_CONTEXT_INDEP_ANCHORS | REG_SYN_CONTEXT_INDEP_OPS | \
- REG_SYN_CONTEXT_INVALID_OPS | REG_SYN_ALLOW_INVALID_INTERVAL | \
- REG_SYN_ESCAPE_IN_CC | REG_SYN_ALLOW_RANGE_OP_IN_CC )
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
#ifdef USE_VARIABLE_SYNTAX
-RegSyntaxType RegSyntaxPosixBasic = {
- ( SYN_POSIX_COMMON_OP | REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_INTERVAL )
+OnigSyntaxType OnigSyntaxPosixBasic = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL )
, 0
, 0
- , ( REG_OPTION_SINGLELINE | REG_OPTION_MULTILINE )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
-RegSyntaxType RegSyntaxPosixExtended = {
- ( SYN_POSIX_COMMON_OP | REG_SYN_OP_SUBEXP | REG_SYN_OP_INTERVAL |
- REG_SYN_OP_1INF | REG_SYN_OP_01 | REG_SYN_OP_ALT )
+OnigSyntaxType OnigSyntaxPosixExtended = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
+ ONIG_SYN_OP_BRACE_INTERVAL |
+ ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
- , ( REG_SYN_CONTEXT_INDEP_ANCHORS |
- REG_SYN_CONTEXT_INDEP_OPS | REG_SYN_CONTEXT_INVALID_OPS |
- REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | REG_SYN_ALLOW_RANGE_OP_IN_CC )
- , ( REG_OPTION_SINGLELINE | REG_OPTION_MULTILINE )
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
-RegSyntaxType RegSyntaxEmacs = {
- ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | REG_SYN_OP_ESC_INTERVAL |
- REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_ALT |
- REG_SYN_OP_0INF | REG_SYN_OP_1INF | REG_SYN_OP_01 |
- REG_SYN_OP_BACK_REF | REG_SYN_OP_LINE_ANCHOR |
- REG_SYN_OP_ESC_GNU_BUF_ANCHOR | REG_SYN_OP_ESC_CONTROL_CHAR )
- , 0
- , REG_SYN_ALLOW_EMPTY_RANGE_IN_CC
- , REG_OPTION_NONE
+OnigSyntaxType OnigSyntaxEmacs = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL |
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
+ ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
+ ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
+ , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+ , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+ , ONIG_OPTION_NONE
};
-RegSyntaxType RegSyntaxGrep = {
- ( REG_SYN_OP_ANYCHAR | REG_SYN_OP_CC | REG_SYN_OP_POSIX_BRACKET |
- REG_SYN_OP_INTERVAL | REG_SYN_OP_ESC_SUBEXP | REG_SYN_OP_ESC_ALT |
- REG_SYN_OP_0INF | REG_SYN_OP_ESC_1INF | REG_SYN_OP_ESC_01 |
- REG_SYN_OP_LINE_ANCHOR )
+OnigSyntaxType OnigSyntaxGrep = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
+ ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
, 0
- , ( REG_SYN_ALLOW_EMPTY_RANGE_IN_CC | REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
- , REG_OPTION_NONE
+ , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
+ , ONIG_OPTION_NONE
};
-RegSyntaxType RegSyntaxGnuRegex = {
+OnigSyntaxType OnigSyntaxGnuRegex = {
SYN_GNU_REGEX_OP
, 0
, SYN_GNU_REGEX_BV
- , REG_OPTION_NONE
+ , ONIG_OPTION_NONE
};
-RegSyntaxType RegSyntaxJava = {
- (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT |
- REG_SYN_OP_ESC_CONTROL_CHAR | REG_SYN_OP_ESC_C_CONTROL |
- REG_SYN_OP_QUOTE | REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 )
- & ~REG_SYN_OP_ESC_WORD_BEGIN_END )
- , ( REG_SYN_OP2_OPTION_PERL |
- REG_SYN_OP2_POSSESSIVE_REPEAT | REG_SYN_OP2_POSSESSIVE_INTERVAL |
- REG_SYN_OP2_CCLASS_SET | REG_SYN_OP2_ESC_V_VTAB |
- REG_SYN_OP2_ESC_U_HEX4 )
- , ( SYN_GNU_REGEX_BV | REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
- , REG_OPTION_SINGLELINE
+OnigSyntaxType OnigSyntaxJava = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
+ ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
+ ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
+ , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
+ , ONIG_OPTION_SINGLELINE
};
-RegSyntaxType RegSyntaxPerl = {
- (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT |
- REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 |
- REG_SYN_OP_ESC_X_BRACE_HEX8 | REG_SYN_OP_ESC_CONTROL_CHAR |
- REG_SYN_OP_ESC_C_CONTROL | REG_SYN_OP_QUOTE )
- & ~REG_SYN_OP_ESC_WORD_BEGIN_END )
- , REG_SYN_OP2_OPTION_PERL
+OnigSyntaxType OnigSyntaxPerl = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
, SYN_GNU_REGEX_BV
- , REG_OPTION_SINGLELINE
+ , ONIG_OPTION_SINGLELINE
};
#endif /* USE_VARIABLE_SYNTAX */
-RegSyntaxType RegSyntaxRuby = {
- (( SYN_GNU_REGEX_OP | REG_SYN_OP_NON_GREEDY | REG_SYN_OP_SUBEXP_EFFECT |
- REG_SYN_OP_ESC_OCTAL3 | REG_SYN_OP_ESC_X_HEX2 |
- REG_SYN_OP_ESC_X_BRACE_HEX8 | REG_SYN_OP_ESC_CONTROL_CHAR |
- REG_SYN_OP_ESC_C_CONTROL )
- & ~REG_SYN_OP_ESC_WORD_BEGIN_END )
- , ( REG_SYN_OP2_OPTION_RUBY |
- REG_SYN_OP2_NAMED_SUBEXP | REG_SYN_OP2_SUBEXP_CALL |
- REG_SYN_OP2_POSSESSIVE_REPEAT | REG_SYN_OP2_CCLASS_SET |
- REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
- REG_SYN_OP2_ESC_M_BAR_META | REG_SYN_OP2_ESC_V_VTAB )
- , ( SYN_GNU_REGEX_BV | REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED |
- REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
- , REG_OPTION_NONE
+OnigSyntaxType OnigSyntaxRuby = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_RUBY |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
+ , ONIG_OPTION_NONE
};
-RegSyntaxType* RegDefaultSyntax = REG_SYNTAX_RUBY;
+OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
#ifdef USE_VARIABLE_SYNTAX
extern int
-regex_set_default_syntax(RegSyntaxType* syntax)
+onig_set_default_syntax(OnigSyntaxType* syntax)
{
if (IS_NULL(syntax))
- syntax = REG_SYNTAX_RUBY;
+ syntax = ONIG_SYNTAX_RUBY;
- RegDefaultSyntax = syntax;
+ OnigDefaultSyntax = syntax;
return 0;
}
+
+extern void
+onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+{
+ *to = *from;
+}
+
+extern void
+onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+{
+ syntax->op = op;
+}
+
+extern void
+onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+{
+ syntax->op2 = op2;
+}
+
+extern void
+onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+{
+ syntax->behavior = behavior;
+}
+
+extern void
+onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+{
+ syntax->options = options;
+}
#endif
+OnigMetaCharTableType OnigMetaCharTable = {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )0 /* anychar '.' */
+ , (OnigCodePoint )0 /* anytime '*' */
+ , (OnigCodePoint )0 /* zero or one time '?' */
+ , (OnigCodePoint )0 /* one or more time '+' */
+ , (OnigCodePoint )0 /* anychar anytime */
+};
+
+#ifdef USE_VARIABLE_META_CHARS
+extern int onig_set_meta_char(unsigned int what, unsigned int c)
+{
+ switch (what) {
+ case ONIG_META_CHAR_ESCAPE:
+ OnigMetaCharTable.esc = c;
+ break;
+ case ONIG_META_CHAR_ANYCHAR:
+ OnigMetaCharTable.anychar = c;
+ break;
+ case ONIG_META_CHAR_ANYTIME:
+ OnigMetaCharTable.anytime = c;
+ break;
+ case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
+ OnigMetaCharTable.zero_or_one_time = c;
+ break;
+ case ONIG_META_CHAR_ONE_OR_MORE_TIME:
+ OnigMetaCharTable.one_or_more_time = c;
+ break;
+ case ONIG_META_CHAR_ANYCHAR_ANYTIME:
+ OnigMetaCharTable.anychar_anytime = c;
+ break;
+ default:
+ return ONIGERR_INVALID_ARGUMENT;
+ break;
+ }
+ return 0;
+}
+#endif /* USE_VARIABLE_META_CHARS */
+
+
+extern void onig_null_warn(char* s) { }
+
+#ifdef DEFAULT_WARN_FUNCTION
+static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_warn = onig_null_warn;
+#endif
+
+#ifdef DEFAULT_VERB_WARN_FUNCTION
+static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_verb_warn = onig_null_warn;
+#endif
+
+extern void onig_set_warn_func(OnigWarnFunc f)
+{
+ onig_warn = f;
+}
+
+extern void onig_set_verb_warn_func(OnigWarnFunc f)
+{
+ onig_verb_warn = f;
+}
+
static void
bbuf_free(BBuf* bbuf)
{
@@ -146,7 +262,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
BBuf *to;
*rto = to = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_VAL(to, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
r = BBUF_INIT(to, from->alloc);
if (r != 0) return r;
to->used = from->used;
@@ -154,15 +270,13 @@ bbuf_clone(BBuf** rto, BBuf* from)
return 0;
}
-#define WC2MB_MAX_BUFLEN 7
#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
#define SET_ALL_MULTI_BYTE_RANGE(pbuf) \
- add_wc_range_to_buf(pbuf, (WCINT )0x80, ~((WCINT )0),\
- (UChar )0x80, (UChar )0xff);
+ add_code_range_to_buf(pbuf, (OnigCodePoint )0x80, ~((OnigCodePoint )0))
#define ADD_ALL_MULTI_BYTE_RANGE(code, mbuf) do {\
- if (! IS_SINGLEBYTE_CODE(code)) {\
+ if (! ONIGENC_IS_SINGLEBYTE(code)) {\
r = SET_ALL_MULTI_BYTE_RANGE(&(mbuf));\
if (r) return r;\
}\
@@ -188,6 +302,7 @@ bitset_set_range(BitSetRef bs, int from, int to)
}
}
+#if 0
static void
bitset_set_all(BitSetRef bs)
{
@@ -196,6 +311,7 @@ bitset_set_all(BitSetRef bs)
bs[i] = ~((Bits )0);
}
}
+#endif
static void
bitset_invert(BitSetRef bs)
@@ -242,8 +358,8 @@ bitset_copy(BitSetRef dest, BitSetRef bs)
}
}
-static int
-k_strncmp(UChar* s1, UChar* s2, int n)
+extern int
+onig_strncmp(UChar* s1, UChar* s2, int n)
{
int x;
@@ -265,7 +381,7 @@ k_strcpy(UChar* dest, UChar* src, UChar* end)
}
extern UChar*
-regex_strdup(UChar* s, UChar* end)
+onig_strdup(UChar* s, UChar* end)
{
int len = end - s;
@@ -288,174 +404,10 @@ regex_strdup(UChar* s, UChar* end)
#define PPEEK (p < end ? *p : PEND_VALUE)
#define PEND (p < end ? 0 : 1)
-#ifdef REG_RUBY_M17N
-
-static int
-wc2mb_buf(WCINT wc, UChar **bufs, UChar **bufe, RegCharEncoding enc)
-{
- int c, len;
-
- c = m17n_firstbyte(enc, wc);
- len = mblen(enc, c);
- if (len > (*bufe - *bufs)) {
- *bufs = xmalloc(len);
- CHECK_NULL_RETURN_VAL(*bufs, REGERR_MEMORY);
- }
- m17n_mbcput(enc, wc, *bufs);
- *bufe = *bufs + len;
- return 0;
-}
-
-#else /* REG_RUBY_M17N */
-
-extern int
-regex_wc2mb_first(RegCharEncoding code, WCINT wc)
-{
- if (code == REGCODE_ASCII) {
- return (wc & 0xff);
- }
- else if (code == REGCODE_UTF8) {
- if ((wc & 0xffffff80) == 0)
- return wc;
- else {
- if ((wc & 0xfffff800) == 0)
- return ((wc>>6)& 0x1f) | 0xc0;
- else if ((wc & 0xffff0000) == 0)
- return ((wc>>12) & 0x0f) | 0xe0;
- else if ((wc & 0xffe00000) == 0)
- return ((wc>>18) & 0x07) | 0xf0;
- else if ((wc & 0xfc000000) == 0)
- return ((wc>>24) & 0x03) | 0xf8;
- else if ((wc & 0x80000000) == 0)
- return ((wc>>30) & 0x01) | 0xfc;
- else {
- return REGERR_TOO_BIG_WIDE_CHAR_VALUE;
- }
- }
- }
- else {
- int first;
-
- if ((wc & 0xff0000) != 0) {
- first = (wc >> 16) & 0xff;
- if (mblen(code, first) != 3)
- return REGERR_INVALID_WIDE_CHAR_VALUE;
- }
- else if ((wc & 0xff00) != 0) {
- first = (wc >> 8) & 0xff;
- if (mblen(code, first) != 2)
- return REGERR_INVALID_WIDE_CHAR_VALUE;
- }
- else {
- if (mblen(code, wc) != 1)
- return REGERR_INVALID_WIDE_CHAR_VALUE;
- return wc;
- }
- return first;
- }
-}
-
-static int
-wc2mb(WCINT wc, UChar buf[], RegCharEncoding code)
-{
-#define UTF8_TRAILS(wc, shift) ((((wc) >> (shift)) & 0x3f) | 0x80)
-#define UTF8_TRAIL0(wc) (((wc) & 0x3f) | 0x80)
-
- UChar *p = buf;
-
- if (code == REGCODE_UTF8) {
- if ((wc & 0xffffff80) == 0)
- *p++ = wc;
- else {
- if ((wc & 0xfffff800) == 0) {
- *p++ = ((wc>>6)& 0x1f) | 0xc0;
- }
- else if ((wc & 0xffff0000) == 0) {
- *p++ = ((wc>>12) & 0x0f) | 0xe0;
- *p++ = UTF8_TRAILS(wc, 6);
- }
- else if ((wc & 0xffe00000) == 0) {
- *p++ = ((wc>>18) & 0x07) | 0xf0;
- *p++ = UTF8_TRAILS(wc, 12);
- *p++ = UTF8_TRAILS(wc, 6);
- }
- else if ((wc & 0xfc000000) == 0) {
- *p++ = ((wc>>24) & 0x03) | 0xf8;
- *p++ = UTF8_TRAILS(wc, 18);
- *p++ = UTF8_TRAILS(wc, 12);
- *p++ = UTF8_TRAILS(wc, 6);
- }
- else if ((wc & 0x80000000) == 0) {
- *p++ = ((wc>>30) & 0x01) | 0xfc;
- *p++ = UTF8_TRAILS(wc, 24);
- *p++ = UTF8_TRAILS(wc, 18);
- *p++ = UTF8_TRAILS(wc, 12);
- *p++ = UTF8_TRAILS(wc, 6);
- }
- else {
- return REGERR_TOO_BIG_WIDE_CHAR_VALUE;
- }
- *p++ = UTF8_TRAIL0(wc);
- }
- }
- else {
- if ((wc & 0xff0000) != 0) *p++ = ((wc >> 16) & 0xff);
- if ((wc & 0xff00) != 0) *p++ = ((wc >> 8) & 0xff);
- *p++ = (wc & 0xff);
-
- if (mblen(code, buf[0]) != (p - buf))
- return REGERR_INVALID_WIDE_CHAR_VALUE;
- }
-
- return p - buf;
-}
-
-static int
-wc2mb_buf(WCINT wc, UChar **bufs, UChar **bufe, RegCharEncoding code)
-{
- int r;
- r = wc2mb(wc, *bufs, code);
- if (r < 0) return r;
-
- *bufe = (*bufs) + r;
- return 0;
-}
-#endif /* not REG_RUBY_M17N */
-
-/* used as function pointer value */
-static int
-is_code_ascii(RegCharEncoding code, UChar c)
-{
- return (c < 128 ? 1 : 0);
-}
-
-static int
-is_code_graph(RegCharEncoding code, UChar c) { return IS_CODE_GRAPH(code, c); }
-static int
-is_code_print(RegCharEncoding code, UChar c) { return IS_CODE_PRINT(code, c); }
-static int
-is_code_alnum(RegCharEncoding code, UChar c) { return IS_CODE_ALNUM(code, c); }
-static int
-is_code_alpha(RegCharEncoding code, UChar c) { return IS_CODE_ALPHA(code, c); }
-static int
-is_code_lower(RegCharEncoding code, UChar c) { return IS_CODE_LOWER(code, c); }
-static int
-is_code_upper(RegCharEncoding code, UChar c) { return IS_CODE_UPPER(code, c); }
-static int
-is_code_cntrl(RegCharEncoding code, UChar c) { return IS_CODE_CNTRL(code, c); }
-static int
-is_code_punct(RegCharEncoding code, UChar c) { return IS_CODE_PUNCT(code, c); }
-static int
-is_code_space(RegCharEncoding code, UChar c) { return IS_CODE_SPACE(code, c); }
-static int
-is_code_blank(RegCharEncoding code, UChar c) { return IS_CODE_BLANK(code, c); }
-static int
-is_code_digit(RegCharEncoding code, UChar c) { return IS_CODE_DIGIT(code, c); }
-static int
-is_code_xdigit(RegCharEncoding code, UChar c) { return IS_CODE_XDIGIT(code, c); }
static UChar*
-k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end, int capa)
+k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end,
+ int capa)
{
UChar* r;
@@ -483,7 +435,7 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
return r;
}
-#ifdef USE_NAMED_SUBEXP
+#ifdef USE_NAMED_GROUP
#define INIT_NAME_BACKREFS_ALLOC_NUM 8
@@ -506,7 +458,7 @@ typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
#define NAMEBUF_SIZE 24
#define NAMEBUF_SIZE_1 25
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
static int
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
{
@@ -529,7 +481,7 @@ i_print_name_entry(UChar* key, NameEntry* e, void* arg)
}
extern int
-regex_print_names(FILE* fp, regex_t* reg)
+onig_print_names(FILE* fp, regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
@@ -562,7 +514,7 @@ names_clear(regex_t* reg)
}
extern int
-regex_names_free(regex_t* reg)
+onig_names_free(regex_t* reg)
{
int r;
NameTable* t;
@@ -599,7 +551,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
key = namebuf;
}
else {
- key = regex_strdup(name, name_end);
+ key = onig_strdup(name, name_end);
if (IS_NULL(key)) return (NameEntry* )NULL;
}
}
@@ -611,7 +563,8 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
}
typedef struct {
- int (*func)(UChar*,int,int*,void*);
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*);
+ regex_t* reg;
void* arg;
int ret;
} INamesArg;
@@ -619,8 +572,9 @@ typedef struct {
static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
- int r = (*(arg->func))(e->name, e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg->arg);
+ int r = (*(arg->func))(e->name, e->name + strlen(e->name), e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
if (r != 0) {
arg->ret = r;
return ST_STOP;
@@ -629,7 +583,9 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg)
}
extern int
-regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
{
INamesArg narg;
NameTable* t = (NameTable* )reg->name_table;
@@ -637,12 +593,24 @@ regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)
narg.ret = 0;
if (IS_NOT_NULL(t)) {
narg.func = func;
+ narg.reg = reg;
narg.arg = arg;
st_foreach(t, i_names, (HashDataType )&narg);
}
return narg.ret;
}
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num_entries;
+ else
+ return 0;
+}
+
#else /* USE_ST_HASH_TABLE */
#define INIT_NAMES_ALLOC_NUM 8
@@ -654,9 +622,9 @@ typedef struct {
} NameTable;
-#ifdef REG_DEBUG
+#ifdef ONIG_DEBUG
extern int
-regex_print_names(FILE* fp, regex_t* reg)
+onig_print_names(FILE* fp, regex_t* reg)
{
int i, j;
NameEntry* e;
@@ -707,13 +675,17 @@ names_clear(regex_t* reg)
e->back_refs = (int* )NULL;
}
}
+ if (IS_NOT_NULL(t->e)) {
+ xfree(t->e);
+ t->e = NULL;
+ }
t->num = 0;
}
return 0;
}
extern int
-regex_names_free(regex_t* reg)
+onig_names_free(regex_t* reg)
{
int r;
NameTable* t;
@@ -738,7 +710,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
len = name_end - name;
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
- if (len == e->name_len && k_strncmp(name, e->name, len) == 0)
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
return e;
}
}
@@ -746,7 +718,9 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
}
extern int
-regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
{
int i, r;
NameEntry* e;
@@ -755,25 +729,37 @@ regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)
if (IS_NOT_NULL(t)) {
for (i = 0; i < t->num; i++) {
e = &(t->e[i]);
- r = (*func)(e->name, e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), arg);
+ r = (*func)(e->name, e->name + e->name_len, e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ reg, arg);
if (r != 0) return r;
}
}
return 0;
}
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num;
+ else
+ return 0;
+}
+
#endif /* else USE_ST_HASH_TABLE */
static int
-name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
+name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
{
int alloc;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
if (name_end - name <= 0)
- return REGERR_INVALID_SUBEXP_NAME;
+ return ONIGERR_EMPTY_GROUP_NAME;
e = name_find(reg, name, name_end);
if (IS_NULL(e)) {
@@ -782,10 +768,10 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
reg->name_table = t = st_init_strtable();
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
- CHECK_NULL_RETURN_VAL(e, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
- e->name = regex_strdup(name, name_end);
- if (IS_NULL(e->name)) return REGERR_MEMORY;
+ e->name = onig_strdup(name, name_end);
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;
st_insert(t, (HashDataType )e->name, (HashDataType )e);
e->name_len = name_end - name;
@@ -798,7 +784,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
if (IS_NULL(t)) {
alloc = INIT_NAMES_ALLOC_NUM;
t = (NameTable* )xmalloc(sizeof(NameTable));
- CHECK_NULL_RETURN_VAL(t, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
t->e = NULL;
t->alloc = 0;
t->num = 0;
@@ -806,7 +792,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
if (IS_NULL(t->e)) {
xfree(t);
- return REGERR_MEMORY;
+ return ONIGERR_MEMORY;
}
t->alloc = alloc;
reg->name_table = t;
@@ -817,7 +803,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
alloc = t->alloc * 2;
t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
- CHECK_NULL_RETURN_VAL(t->e, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
t->alloc = alloc;
clear:
@@ -831,44 +817,53 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref)
}
e = &(t->e[t->num]);
t->num++;
- e->name = regex_strdup(name, name_end);
+ e->name = onig_strdup(name, name_end);
e->name_len = name_end - name;
#endif
}
+ if (e->back_num >= 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
+
e->back_num++;
if (e->back_num == 1) {
e->back_ref1 = backref;
}
- else if (e->back_num == 2) {
- alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
- e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
- CHECK_NULL_RETURN_VAL(e->back_refs, REGERR_MEMORY);
- e->back_alloc = alloc;
- e->back_refs[0] = e->back_ref1;
- e->back_refs[1] = backref;
- }
else {
- if (e->back_num > e->back_alloc) {
- alloc = e->back_alloc * 2;
- e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
- CHECK_NULL_RETURN_VAL(e->back_refs, REGERR_MEMORY);
+ if (e->back_num == 2) {
+ alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
+ e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
e->back_alloc = alloc;
+ e->back_refs[0] = e->back_ref1;
+ e->back_refs[1] = backref;
+ }
+ else {
+ if (e->back_num > e->back_alloc) {
+ alloc = e->back_alloc * 2;
+ e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+ e->back_alloc = alloc;
+ }
+ e->back_refs[e->back_num - 1] = backref;
}
- e->back_refs[e->back_num - 1] = backref;
}
return 0;
}
extern int
-regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
+onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
int** nums)
{
NameEntry* e;
e = name_find(reg, name, name_end);
- if (IS_NULL(e)) return REGERR_UNDEFINED_NAME_REFERENCE;
+ if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
switch (e->back_num) {
case 0:
@@ -883,21 +878,60 @@ regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
return e->back_num;
}
-#else
+extern int
+onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
+ OnigRegion *region)
+{
+ int i, n, *nums;
+
+ n = onig_name_to_group_numbers(reg, name, name_end, &nums);
+ if (n < 0)
+ return n;
+ else if (n == 0)
+ return ONIGERR_PARSER_BUG;
+ else if (n == 1)
+ return nums[0];
+ else {
+ if (IS_NOT_NULL(region)) {
+ for (i = n - 1; i >= 0; i--) {
+ if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
+ return nums[i];
+ }
+ }
+ return nums[n - 1];
+ }
+}
+
+#else /* USE_NAMED_GROUP */
extern int
-regex_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
+onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
int** nums)
{
- return REG_NO_SUPPORT_CONFIG;
+ return ONIG_NO_SUPPORT_CONFIG;
}
extern int
-regex_foreach_name(regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)
+onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
+ OnigRegion* region)
{
- return REG_NO_SUPPORT_CONFIG;
+ return ONIG_NO_SUPPORT_CONFIG;
}
-#endif
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ return 0;
+}
+#endif /* else USE_NAMED_GROUP */
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
@@ -907,12 +941,17 @@ scan_env_clear(ScanEnv* env)
{
int i;
- BIT_STATUS_CLEAR(env->backtrack_mem);
+ BIT_STATUS_CLEAR(env->capture_history);
+ BIT_STATUS_CLEAR(env->bt_mem_start);
+ BIT_STATUS_CLEAR(env->bt_mem_end);
BIT_STATUS_CLEAR(env->backrefed_mem);
env->error = (UChar* )NULL;
env->error_end = (UChar* )NULL;
env->num_call = 0;
env->num_mem = 0;
+#ifdef USE_NAMED_GROUP
+ env->num_named = 0;
+#endif
env->mem_alloc = 0;
env->mem_nodes_dynamic = (Node** )NULL;
@@ -939,7 +978,7 @@ scan_env_add_mem_entry(ScanEnv* env)
alloc = env->mem_alloc * 2;
p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
}
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
for (i = env->num_mem + 1; i < alloc; i++)
p[i] = NULL_NODE;
@@ -959,7 +998,7 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
if (env->num_mem >= num)
SCANENV_MEM_NODES(env)[num] = node;
else
- return REGERR_INVALID_BACKREF;
+ return ONIGERR_PARSER_BUG;
return 0;
}
@@ -973,7 +1012,7 @@ static FreeNode* FreeNodeList = (FreeNode* )NULL;
#endif
extern void
-regex_node_free(Node* node)
+onig_node_free(Node* node)
{
if (IS_NULL(node)) return ;
@@ -986,8 +1025,8 @@ regex_node_free(Node* node)
case N_LIST:
case N_ALT:
- regex_node_free(NCONS(node).left);
- regex_node_free(NCONS(node).right);
+ onig_node_free(NCONS(node).left);
+ onig_node_free(NCONS(node).right);
break;
case N_CCLASS:
@@ -997,12 +1036,12 @@ regex_node_free(Node* node)
case N_QUALIFIER:
if (NQUALIFIER(node).target)
- regex_node_free(NQUALIFIER(node).target);
+ onig_node_free(NQUALIFIER(node).target);
break;
case N_EFFECT:
if (NEFFECT(node).target)
- regex_node_free(NEFFECT(node).target);
+ onig_node_free(NEFFECT(node).target);
break;
case N_BACKREF:
@@ -1012,7 +1051,7 @@ regex_node_free(Node* node)
case N_ANCHOR:
if (NANCHOR(node).target)
- regex_node_free(NANCHOR(node).target);
+ onig_node_free(NANCHOR(node).target);
break;
}
@@ -1031,7 +1070,7 @@ regex_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
extern int
-regex_free_node_list()
+onig_free_node_list()
{
FreeNode* n;
@@ -1125,7 +1164,7 @@ node_new_alt(Node* left, Node* right)
}
extern Node*
-regex_node_new_anchor(int type)
+onig_node_new_anchor(int type)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1137,7 +1176,7 @@ regex_node_new_anchor(int type)
}
static Node*
-node_new_backref(int back_num, int* backrefs, ScanEnv* env)
+node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
{
int i;
Node* node = node_new();
@@ -1147,6 +1186,8 @@ node_new_backref(int back_num, int* backrefs, ScanEnv* env)
NBACKREF(node).state = 0;
NBACKREF(node).back_num = back_num;
NBACKREF(node).back_dynamic = (int* )NULL;
+ if (by_name != 0)
+ NBACKREF(node).state |= NST_NAME_REF;
for (i = 0; i < back_num; i++) {
if (backrefs[i] <= env->num_mem &&
@@ -1163,7 +1204,7 @@ node_new_backref(int back_num, int* backrefs, ScanEnv* env)
else {
int* p = (int* )xmalloc(sizeof(int) * back_num);
if (IS_NULL(p)) {
- regex_node_free(node);
+ onig_node_free(node);
return NULL;
}
NBACKREF(node).back_dynamic = p;
@@ -1200,11 +1241,11 @@ node_new_qualifier(int lower, int upper, int by_number)
NQUALIFIER(node).lower = lower;
NQUALIFIER(node).upper = upper;
NQUALIFIER(node).greedy = 1;
- NQUALIFIER(node).by_number = by_number;
- NQUALIFIER(node).target_may_empty = 0;
- NQUALIFIER(node).head_exact = NULL_NODE;
- NQUALIFIER(node).next_head_exact = NULL_NODE;
- NQUALIFIER(node).is_refered = 0;
+ NQUALIFIER(node).by_number = by_number;
+ NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQUALIFIER(node).head_exact = NULL_NODE;
+ NQUALIFIER(node).next_head_exact = NULL_NODE;
+ NQUALIFIER(node).is_refered = 0;
return node;
}
@@ -1225,13 +1266,27 @@ node_new_effect(int type)
}
extern Node*
-regex_node_new_effect(int type)
+onig_node_new_effect(int type)
{
return node_new_effect(type);
}
static Node*
-node_new_option(RegOptionType option)
+node_new_effect_memory(OnigOptionType option, int is_named)
+{
+ Node* node = node_new_effect(EFFECT_MEMORY);
+ CHECK_NULL_RETURN(node);
+ if (is_named != 0)
+ SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
+
+#ifdef USE_SUBEXP_CALL
+ NEFFECT(node).option = option;
+#endif
+ return node;
+}
+
+static Node*
+node_new_option(OnigOptionType option)
{
Node* node = node_new_effect(EFFECT_OPTION);
CHECK_NULL_RETURN(node);
@@ -1240,7 +1295,7 @@ node_new_option(RegOptionType option)
}
extern int
-regex_node_str_cat(Node* node, UChar* s, UChar* end)
+onig_node_str_cat(Node* node, UChar* s, UChar* end)
{
int addlen = end - s;
@@ -1261,7 +1316,7 @@ regex_node_str_cat(Node* node, UChar* s, UChar* end)
else
p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
- CHECK_NULL_RETURN_VAL(p, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
NSTRING(node).s = p;
NSTRING(node).capa = capa;
}
@@ -1281,11 +1336,11 @@ node_str_cat_char(Node* node, UChar c)
UChar s[1];
s[0] = c;
- return regex_node_str_cat(node, s, s + 1);
+ return onig_node_str_cat(node, s, s + 1);
}
extern void
-regex_node_conv_to_str_node(Node* node, int flag)
+onig_node_conv_to_str_node(Node* node, int flag)
{
node->type = N_STRING;
@@ -1306,8 +1361,8 @@ node_new_str(UChar* s, UChar* end)
NSTRING(node).flag = 0;
NSTRING(node).s = NSTRING(node).buf;
NSTRING(node).end = NSTRING(node).buf;
- if (regex_node_str_cat(node, s, end)) {
- regex_node_free(node);
+ if (onig_node_str_cat(node, s, end)) {
+ onig_node_free(node);
return NULL;
}
return node;
@@ -1346,13 +1401,13 @@ node_new_str_raw_char(UChar c)
}
static Node*
-str_node_split_last_char(StrNode* sn, RegCharEncoding enc)
+str_node_split_last_char(StrNode* sn, OnigEncoding enc)
{
UChar *p;
Node* n = NULL_NODE;
if (sn->end > sn->s) {
- p = regex_get_prev_char_head(enc, sn->s, sn->end);
+ p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
if (p && p > sn->s) { /* can be splitted. */
n = node_new_str(p, sn->end);
if ((sn->flag & NSTR_RAW) != 0)
@@ -1364,16 +1419,16 @@ str_node_split_last_char(StrNode* sn, RegCharEncoding enc)
}
static int
-str_node_can_be_split(StrNode* sn, RegCharEncoding enc)
+str_node_can_be_split(StrNode* sn, OnigEncoding enc)
{
if (sn->end > sn->s) {
- return ((mblen(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0);
+ return ((enc_len(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0);
}
return 0;
}
extern int
-regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc)
+onig_scan_unsigned_number(UChar** src, UChar* end, OnigEncoding enc)
{
unsigned int num, val;
int c;
@@ -1382,7 +1437,7 @@ regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc)
num = 0;
while (!PEND) {
PFETCH(c);
- if (IS_CODE_DIGIT(enc, c)) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
val = (unsigned int )DIGITVAL(c);
if ((INT_MAX_LIMIT - val) / 10UL < num)
return -1; /* overflow */
@@ -1400,7 +1455,7 @@ regex_scan_unsigned_number(UChar** src, UChar* end, RegCharEncoding enc)
static int
scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
- RegCharEncoding enc)
+ OnigEncoding enc)
{
int c;
unsigned int num, val;
@@ -1409,12 +1464,12 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
num = 0;
while (!PEND && maxlen-- != 0) {
PFETCH(c);
- if (IS_CODE_XDIGIT(enc, c)) {
- val = (unsigned int )XDIGITVAL(c);
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
+ val = (unsigned int )XDIGITVAL(enc,c);
if ((INT_MAX_LIMIT - val) / 16UL < num)
return -1; /* overflow */
- num = (num << 4) + XDIGITVAL(c);
+ num = (num << 4) + XDIGITVAL(enc,c);
}
else {
PUNFETCH;
@@ -1427,7 +1482,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
static int
scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
- RegCharEncoding enc)
+ OnigEncoding enc)
{
int c;
unsigned int num, val;
@@ -1436,7 +1491,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
num = 0;
while (!PEND && maxlen-- != 0) {
PFETCH(c);
- if (IS_CODE_ODIGIT(enc, c)) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
val = ODIGITVAL(c);
if ((INT_MAX_LIMIT - val) / 8UL < num)
return -1; /* overflow */
@@ -1453,38 +1508,37 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
}
-#define BBUF_WRITE_WCINT(bbuf,pos,wc) \
- BBUF_WRITE(bbuf, pos, &(wc), SIZE_WCINT)
+#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
+ BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
/* data format:
- [multi-byte-head-BitSet][n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
- (all data size is WCINT)
+ [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
+ (all data size is OnigCodePoint)
*/
static int
-new_wc_range(BBuf** pbuf)
+new_code_range(BBuf** pbuf)
{
-#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_WCINT * 5)
+#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
int r;
- WCINT n;
+ OnigCodePoint n;
BBuf* bbuf;
bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_VAL(*pbuf, REGERR_MEMORY);
- r = BBUF_INIT(*pbuf, SIZE_BITSET + INIT_MULTI_BYTE_RANGE_SIZE);
+ CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
+ r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
if (r) return r;
n = 0;
- BBUF_WRITE_WCINT(bbuf, SIZE_BITSET, n);
- BITSET_CLEAR((BitSetRef )bbuf->p);
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
return 0;
}
static int
-add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto)
+add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
{
int r, inc_n, pos;
int low, high, bound, x;
- WCINT n, *data;
+ OnigCodePoint n, *data;
BBuf* bbuf;
if (from > to) {
@@ -1492,16 +1546,16 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto)
}
if (IS_NULL(*pbuf)) {
- r = new_wc_range(pbuf);
+ r = new_code_range(pbuf);
if (r) return r;
bbuf = *pbuf;
n = 0;
}
else {
bbuf = *pbuf;
- GET_WCINT(n, bbuf->p + SIZE_BITSET);
+ GET_CODE_POINT(n, bbuf->p);
}
- data = (WCINT* )(bbuf->p + SIZE_BITSET);
+ data = (OnigCodePoint* )(bbuf->p);
data++;
for (low = 0, bound = n; low < bound; ) {
@@ -1521,8 +1575,8 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto)
}
inc_n = low + 1 - high;
- if (n + inc_n > REG_MAX_MULTI_BYTE_RANGES_NUM)
- return REGERR_TOO_MANY_MULTI_BYTE_RANGES;
+ if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
+ return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
if (inc_n != 1) {
if (from > data[low*2])
@@ -1532,9 +1586,9 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto)
}
if (inc_n != 0 && high < n) {
- int from_pos = SIZE_BITSET + SIZE_WCINT * (1 + high * 2);
- int to_pos = SIZE_BITSET + SIZE_WCINT * (1 + (low + 1) * 2);
- int size = (n - high) * 2 * SIZE_WCINT;
+ int from_pos = SIZE_CODE_POINT * (1 + high * 2);
+ int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
+ int size = (n - high) * 2 * SIZE_CODE_POINT;
if (inc_n > 0) {
BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
@@ -1544,52 +1598,34 @@ add_wc_range_to_buf(BBuf** pbuf, WCINT from, WCINT to, UChar cfrom, UChar cto)
}
}
- pos = SIZE_BITSET + SIZE_WCINT * (1 + low * 2);
- BBUF_ENSURE_SIZE(bbuf, pos + SIZE_WCINT * 2);
- BBUF_WRITE_WCINT(bbuf, pos, from);
- BBUF_WRITE_WCINT(bbuf, pos + SIZE_WCINT, to);
+ pos = SIZE_CODE_POINT * (1 + low * 2);
+ BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
+ BBUF_WRITE_CODE_POINT(bbuf, pos, from);
+ BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
n += inc_n;
- BBUF_WRITE_WCINT(bbuf, SIZE_BITSET, n);
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
- if (inc_n > 0) {
- int i;
- UChar tmp;
-
- if (cfrom > cto) {
- tmp = cfrom; cfrom = cto; cto = tmp;
- }
-
- for (i = cfrom; i <= cto; i++) {
- BITSET_SET_BIT((BitSetRef)bbuf->p, i);
- }
- }
return 0;
}
static int
-add_wc_range(BBuf** pbuf, ScanEnv* env, WCINT from, WCINT to)
+add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
{
- int cfrom, cto;
-
if (from > to) {
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
return 0;
else
- return REGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
- cfrom = WC2MB_FIRST(env->enc, from);
- if (cfrom < 0) return cfrom;
- cto = WC2MB_FIRST(env->enc, to);
- if (cto < 0) return cto;
- return add_wc_range_to_buf(pbuf, from, to, (UChar )cfrom, (UChar )cto);
+ return add_code_range_to_buf(pbuf, from, to);
}
static int
-not_wc_range_buf(BBuf* bbuf, BBuf** pbuf)
+not_code_range_buf(BBuf* bbuf, BBuf** pbuf)
{
int r, i, n;
- WCINT pre, from, to, *data;
+ OnigCodePoint pre, from, to, *data;
*pbuf = (BBuf* )NULL;
if (IS_NULL(bbuf)) {
@@ -1597,8 +1633,8 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf)
return SET_ALL_MULTI_BYTE_RANGE(pbuf);
}
- data = (WCINT* )(bbuf->p + SIZE_BITSET);
- GET_WCINT(n, data);
+ data = (OnigCodePoint* )(bbuf->p);
+ GET_CODE_POINT(n, data);
data++;
if (n <= 0) goto set_all;
@@ -1608,14 +1644,14 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf)
from = data[i*2];
to = data[i*2+1];
if (pre <= from - 1) {
- r = add_wc_range_to_buf(pbuf, pre, from - 1, (UChar )0, (UChar )0);
+ r = add_code_range_to_buf(pbuf, pre, from - 1);
if (r != 0) return r;
}
- if (to == ~((WCINT )0)) break;
+ if (to == ~((OnigCodePoint )0)) break;
pre = to + 1;
}
- if (to < ~((WCINT )0)) {
- r = add_wc_range_to_buf(pbuf, to + 1, ~((WCINT )0), (UChar )0, (UChar )0);
+ if (to < ~((OnigCodePoint )0)) {
+ r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
}
return r;
}
@@ -1628,11 +1664,11 @@ not_wc_range_buf(BBuf* bbuf, BBuf** pbuf)
} while (0)
static int
-or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
{
int i, r;
- WCINT n1, *data1;
- WCINT from, to;
+ OnigCodePoint n1, *data1;
+ OnigCodePoint from, to;
*pbuf = (BBuf* )NULL;
if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
@@ -1654,7 +1690,7 @@ or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
return bbuf_clone(pbuf, bbuf2);
}
else {
- return not_wc_range_buf(bbuf2, pbuf);
+ return not_code_range_buf(bbuf2, pbuf);
}
}
}
@@ -1662,32 +1698,33 @@ or_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
if (not1 != 0)
SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
- data1 = (WCINT* )(bbuf1->p + SIZE_BITSET);
- GET_WCINT(n1, data1);
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ GET_CODE_POINT(n1, data1);
data1++;
if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
r = bbuf_clone(pbuf, bbuf2);
}
else if (not1 == 0) { /* 1 OR (not 2) */
- r = not_wc_range_buf(bbuf2, pbuf);
+ r = not_code_range_buf(bbuf2, pbuf);
}
if (r != 0) return r;
for (i = 0; i < n1; i++) {
from = data1[i*2];
to = data1[i*2+1];
- r = add_wc_range_to_buf(pbuf, from, to, (UChar )0, (UChar )0);
+ r = add_code_range_to_buf(pbuf, from, to);
if (r != 0) return r;
}
return 0;
}
static int
-and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n)
+and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
+ OnigCodePoint* data, int n)
{
int i, r;
- WCINT from2, to2;
+ OnigCodePoint from2, to2;
for (i = 0; i < n; i++) {
from2 = data[i*2];
@@ -1701,7 +1738,7 @@ and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n)
else if (from2 <= to1) {
if (to2 < to1) {
if (from1 <= from2 - 1) {
- r = add_wc_range_to_buf(pbuf, from1, from2-1, (UChar )0, (UChar )0);
+ r = add_code_range_to_buf(pbuf, from1, from2-1);
if (r != 0) return r;
}
from1 = to2 + 1;
@@ -1716,18 +1753,18 @@ and_wc_range1(BBuf** pbuf, WCINT from1, WCINT to1, WCINT* data, int n)
if (from1 > to1) break;
}
if (from1 <= to1) {
- r = add_wc_range_to_buf(pbuf, from1, to1, (UChar )0, (UChar )0);
+ r = add_code_range_to_buf(pbuf, from1, to1);
if (r != 0) return r;
}
return 0;
}
static int
-and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
{
int i, j, r;
- WCINT n1, n2, *data1, *data2;
- WCINT from, to, from1, to1, from2, to2;
+ OnigCodePoint n1, n2, *data1, *data2;
+ OnigCodePoint from, to, from1, to1, from2, to2;
*pbuf = (BBuf* )NULL;
if (IS_NULL(bbuf1)) {
@@ -1744,10 +1781,10 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
if (not1 != 0)
SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
- data1 = (WCINT* )(bbuf1->p + SIZE_BITSET);
- data2 = (WCINT* )(bbuf2->p + SIZE_BITSET);
- GET_WCINT(n1, data1);
- GET_WCINT(n2, data2);
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ data2 = (OnigCodePoint* )(bbuf2->p);
+ GET_CODE_POINT(n1, data1);
+ GET_CODE_POINT(n2, data2);
data1++;
data2++;
@@ -1762,7 +1799,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
if (to2 < from1) continue;
from = MAX(from1, from2);
to = MIN(to1, to2);
- r = add_wc_range_to_buf(pbuf, from, to, (UChar )0, (UChar )0);
+ r = add_code_range_to_buf(pbuf, from, to);
if (r != 0) return r;
}
}
@@ -1771,7 +1808,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
for (i = 0; i < n1; i++) {
from1 = data1[i*2];
to1 = data1[i*2+1];
- r = and_wc_range1(pbuf, from1, to1, data2, n2);
+ r = and_code_range1(pbuf, from1, to1, data2, n2);
if (r != 0) return r;
}
}
@@ -1780,7 +1817,7 @@ and_wc_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
}
static int
-and_cclass(CClassNode* dest, CClassNode* cc)
+and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
{
int r, not1, not2;
BBuf *buf1, *buf2, *pbuf;
@@ -1811,34 +1848,34 @@ and_cclass(CClassNode* dest, CClassNode* cc)
bitset_invert(dest->bs);
}
- if (not1 != 0 && not2 != 0) {
- r = or_wc_range_buf(buf1, 0, buf2, 0, &pbuf);
- }
- else {
- r = and_wc_range_buf(buf1, not1, buf2, not2, &pbuf);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf;
- r = not_wc_range_buf(pbuf, &tbuf);
- if (r != 0) {
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = or_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
bbuf_free(pbuf);
- return r;
+ pbuf = tbuf;
}
- bbuf_free(pbuf);
- pbuf = tbuf;
}
- }
- if (r != 0) return r;
+ if (r != 0) return r;
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- if (IS_NOT_NULL(pbuf)) {
- bitset_set_all((BitSetRef )pbuf->p); /* Sorry, but I'm tired. */
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
}
- return r;
+ return 0;
}
static int
-or_cclass(CClassNode* dest, CClassNode* cc)
+or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
{
int r, not1, not2;
BBuf *buf1, *buf2, *pbuf;
@@ -1869,36 +1906,37 @@ or_cclass(CClassNode* dest, CClassNode* cc)
bitset_invert(dest->bs);
}
- if (not1 != 0 && not2 != 0) {
- r = and_wc_range_buf(buf1, 0, buf2, 0, &pbuf);
- }
- else {
- r = or_wc_range_buf(buf1, not1, buf2, not2, &pbuf);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf;
- r = not_wc_range_buf(pbuf, &tbuf);
- if (r != 0) {
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = or_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
bbuf_free(pbuf);
- return r;
+ pbuf = tbuf;
}
- bbuf_free(pbuf);
- pbuf = tbuf;
}
- }
- if (r != 0) return r;
+ if (r != 0) return r;
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- if (IS_NOT_NULL(pbuf)) {
- bitset_set_all((BitSetRef )pbuf->p); /* Sorry, but I'm tired. */
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
}
- return r;
+ else
+ return 0;
}
static int
conv_backslash_value(int c, ScanEnv* env)
{
- if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_CONTROL_CHAR)) {
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
switch (c) {
case 'n': return '\n';
case 't': return '\t';
@@ -1908,7 +1946,7 @@ conv_backslash_value(int c, ScanEnv* env)
case 'b': return '\010';
case 'e': return '\033';
case 'v':
- if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_V_VTAB))
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
return '\v';
break;
@@ -1976,8 +2014,8 @@ popular_qualifier_num(QualifierNode* qf)
return -1;
}
-static void
-reduce_nested_qualifier(Node* pnode, Node* cnode)
+extern void
+onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
{
#define NQ_ASIS 0 /* as is */
#define NQ_DEL 1 /* delete parent */
@@ -2039,7 +2077,7 @@ reduce_nested_qualifier(Node* pnode, Node* cnode)
}
c->target = NULL_NODE;
- regex_node_free(cnode);
+ onig_node_free(cnode);
}
@@ -2047,7 +2085,7 @@ enum TokenSyms {
TK_EOT = 0, /* end of token */
TK_BYTE = 1,
TK_RAW_BYTE = 2,
- TK_WC,
+ TK_CODE_POINT,
TK_ANYCHAR,
TK_CHAR_TYPE,
TK_BACKREF,
@@ -2055,11 +2093,13 @@ enum TokenSyms {
TK_ANCHOR,
TK_OP_REPEAT,
TK_INTERVAL,
+ TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
TK_ALT,
TK_SUBEXP_OPEN,
TK_SUBEXP_CLOSE,
TK_CC_OPEN,
TK_QUOTE_OPEN,
+ TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
/* in cc */
TK_CC_CLOSE,
TK_CC_RANGE,
@@ -2075,7 +2115,7 @@ typedef struct {
UChar* backp;
union {
int c;
- WCINT wc;
+ OnigCodePoint code;
int anchor;
int subtype;
struct {
@@ -2088,71 +2128,90 @@ typedef struct {
int num;
int ref1;
int* refs;
+ int by_name;
} backref;
struct {
UChar* name;
UChar* name_end;
} call;
+ struct {
+ int not;
+ } prop;
} u;
-} RegToken;
+} OnigToken;
static int
-fetch_range_qualifier(UChar** src, UChar* end, RegToken* tok, ScanEnv* env)
+fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
{
- int low, up, syn_allow;
+ int low, up, syn_allow, non_low = 0;
int c;
UChar* p = *src;
- syn_allow = IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_INVALID_INTERVAL);
+ syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
if (PEND) {
if (syn_allow)
return 1; /* "....{" : OK! */
else
- return REGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
}
if (! syn_allow) {
c = PPEEK;
if (c == ')' || c == '(' || c == '|') {
- return REGERR_END_PATTERN_AT_LEFT_BRACE;
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
}
}
- low = regex_scan_unsigned_number(&p, end, env->enc);
- if (low < 0) return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- if (low > REG_MAX_REPEAT_NUM)
- return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ low = onig_scan_unsigned_number(&p, end, env->enc);
+ if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (low > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- if (p == *src) goto invalid; /* can't read low */
+ if (p == *src) { /* can't read low */
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
+ /* allow {,n} as {0,n} */
+ low = 0;
+ non_low = 1;
+ }
+ else
+ goto invalid;
+ }
if (PEND) goto invalid;
PFETCH(c);
if (c == ',') {
UChar* prev = p;
- up = regex_scan_unsigned_number(&p, end, env->enc);
- if (up < 0) return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- if (up > REG_MAX_REPEAT_NUM)
- return REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
-
- if (p == prev) up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ up = onig_scan_unsigned_number(&p, end, env->enc);
+ if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (up > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == prev) {
+ if (non_low != 0)
+ goto invalid;
+ up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
}
else {
+ if (non_low != 0)
+ goto invalid;
+
PUNFETCH;
up = low; /* {n} : exact n times */
}
if (PEND) goto invalid;
PFETCH(c);
- if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_INTERVAL)) {
- if (c != '\\') goto invalid;
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
+ if (c != MC_ESC) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
if (!IS_REPEAT_INFINITE(up) && low > up) {
- return REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+ return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
}
tok->type = TK_INTERVAL;
@@ -2165,7 +2224,7 @@ fetch_range_qualifier(UChar** src, UChar* end, RegToken* tok, ScanEnv* env)
if (syn_allow)
return 1; /* OK */
else
- return REGERR_INVALID_REPEAT_RANGE_PATTERN;
+ return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
}
/* \M-, \C-, \c, or \... */
@@ -2175,18 +2234,18 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
int c;
UChar* p = *src;
- if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
PFETCH(c);
switch (c) {
case 'M':
- if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_M_BAR_META)) {
- if (PEND) return REGERR_END_PATTERN_AT_META;
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
- if (c != '-') return REGERR_META_CODE_SYNTAX;
- if (PEND) return REGERR_END_PATTERN_AT_META;
+ if (c != '-') return ONIGERR_META_CODE_SYNTAX;
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
- if (c == '\\') {
+ if (c == MC_ESC) {
c = fetch_escaped_value(&p, end, env);
if (c < 0) return c;
}
@@ -2197,21 +2256,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
break;
case 'C':
- if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
- if (PEND) return REGERR_END_PATTERN_AT_CONTROL;
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
- if (c != '-') return REGERR_CONTROL_CODE_SYNTAX;
+ if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
goto control;
}
else
goto backslash;
case 'c':
- if (IS_SYNTAX_OP(env->syntax, REG_SYN_OP_ESC_C_CONTROL)) {
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
control:
- if (PEND) return REGERR_END_PATTERN_AT_CONTROL;
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
- if (c == '\\') {
+ if (c == MC_ESC) {
c = fetch_escaped_value(&p, end, env);
if (c < 0) return c;
}
@@ -2235,80 +2294,175 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
return c;
}
-static int fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env);
+static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
-#ifdef USE_NAMED_SUBEXP
+#ifdef USE_NAMED_GROUP
+/*
+ def: 0 -> define name (don't allow number name)
+ 1 -> reference name (allow number name)
+*/
static int
-fetch_name(UChar** src, UChar* end, UChar** name_end, ScanEnv* env)
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
{
- int len;
+ int r, len, is_num;
int c = 0;
+ UChar *name_end;
UChar *p = *src;
- while (!PEND) {
- *name_end = p;
+ name_end = end;
+ r = 0;
+ is_num = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
PFETCH(c);
- if (c == '>') break;
- else if (c == ')' || c == '\\' || c == '\0')
- return REGERR_INVALID_SUBEXP_NAME;
+ if (c == '>')
+ return ONIGERR_EMPTY_GROUP_NAME;
- len = mblen(env->enc, c);
- while (!PEND && len-- > 1) {
+ if (ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
+ if (ref == 1)
+ is_num = 1;
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ len = enc_len(env->enc, c);
+ while (!PEND && len-- > 1)
PFETCH(c);
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == '>' || c == ')') break;
+
+ len = enc_len(env->enc, c);
+ if (is_num == 1) {
+ if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
+ if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && c != '_')
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ else {
+ if (len == 1) {
+ if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) &&
+ !ONIGENC_IS_CODE_DIGIT(env->enc, c) &&
+ c != '_') {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
}
+
+ while (!PEND && len-- > 1)
+ PFETCH(c);
+ }
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+ else {
+ c = **src;
+ if (ONIGENC_IS_CODE_UPPER(env->enc, c))
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#else
+static int
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+{
+ int r, len;
+ int c = 0;
+ UChar *name_end;
+ UChar *p = *src;
+
+ r = 0;
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (enc_len(env->enc, c) > 1)
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+
+ if (c == '>' || c == ')') break;
+ if (! ONIGENC_IS_CODE_DIGIT(env->enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ err:
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
}
- if (c != '>') return REGERR_INVALID_SUBEXP_NAME;
- *src = p;
- return 0;
}
#endif
static void
CC_ESC_WARN(ScanEnv* env, UChar *c)
{
-#ifdef WARNING
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED) &&
- IS_SYNTAX_BV(env->syntax, REG_SYN_ESCAPE_IN_CC)) {
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
char buf[WARN_BUFSIZE];
- regex_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
"character class has '%s' without escape", c);
- WARNING(buf);
+ (*onig_warn)(buf);
}
-#endif
}
static void
CCEND_ESC_WARN(ScanEnv* env, UChar* c)
{
-#ifdef WARNING
- if (IS_SYNTAX_BV((env)->syntax, REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED)) {
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
char buf[WARN_BUFSIZE];
- regex_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
(env)->pattern, (env)->pattern_end,
"regular expression has '%s' without escape", c);
- WARNING(buf);
+ (*onig_warn)(buf);
}
-#endif
}
static UChar*
-find_str_position(WCINT s[], int n, UChar* from, UChar* to, UChar **next,
- RegCharEncoding enc)
+find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ UChar **next, OnigEncoding enc)
{
int i;
- WCINT x;
+ OnigCodePoint x;
UChar *q;
UChar *p = from;
while (p < to) {
- x = mb2wc(p, to, enc);
- q = p + mblen(enc, *p);
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, *p);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
- x = mb2wc(q, to, enc);
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += mblen(enc, *q);
+ q += enc_len(enc, *q);
}
if (i >= n) {
if (IS_NOT_NULL(next))
@@ -2322,11 +2476,11 @@ find_str_position(WCINT s[], int n, UChar* from, UChar* to, UChar **next,
}
static int
-str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to,
- WCINT bad, RegCharEncoding enc)
+str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ OnigCodePoint bad, OnigEncoding enc)
{
int i, in_esc;
- WCINT x;
+ OnigCodePoint x;
UChar *q;
UChar *p = from;
@@ -2334,24 +2488,24 @@ str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to,
while (p < to) {
if (in_esc) {
in_esc = 0;
- p += mblen(enc, *p);
+ p += enc_len(enc, *p);
}
else {
- x = mb2wc(p, to, enc);
- q = p + mblen(enc, *p);
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, *p);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
- x = mb2wc(q, to, enc);
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += mblen(enc, *q);
+ q += enc_len(enc, *q);
}
if (i >= n) return 1;
- p += mblen(enc, *p);
+ p += enc_len(enc, *p);
}
else {
- x = mb2wc(p, to, enc);
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
if (x == bad) return 0;
- else if (x == '\\') in_esc = 1;
+ else if (x == MC_ESC) in_esc = 1;
p = q;
}
}
@@ -2360,10 +2514,10 @@ str_exist_check_with_esc(WCINT s[], int n, UChar* from, UChar* to,
}
static int
-fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
+fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int c, num;
- RegSyntaxType* syn = env->syntax;
+ OnigSyntaxType* syn = env->syntax;
UChar* prev;
UChar* p = *src;
@@ -2382,11 +2536,11 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (c == '-') {
tok->type = TK_CC_RANGE;
}
- else if (c == '\\') {
- if (! IS_SYNTAX_BV(syn, REG_SYN_ESCAPE_IN_CC))
+ else if (c == MC_ESC) {
+ if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
- if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
PFETCH(c);
tok->escaped = 1;
@@ -2417,31 +2571,41 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
break;
+ case 'p':
+ case 'P':
+ if (PPEEK == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+ }
+ break;
+
case 'x':
if (PEND) break;
prev = p;
- if (PPEEK == '{' && IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
- if (num < 0) return REGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND && IS_XDIGIT(*p) && p - prev >= 9)
- return REGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
if (p > prev + 1 && !PEND && PPEEK == '}') {
PINC;
- tok->type = TK_WC;
- tok->base = 16;
- tok->u.wc = (WCINT )num;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
}
else {
/* can't read nothing or invalid format */
p = prev;
}
}
- else if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_HEX2)) {
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2455,9 +2619,9 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (PEND) break;
prev = p;
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_ESC_U_HEX4)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2469,11 +2633,11 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '0':
case '1': case '2': case '3': case '4': case '5': case '6': case '7':
- if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_OCTAL3)) {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
PUNFETCH;
prev = p;
num = scan_unsigned_octal_number(&p, end, 3, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2495,11 +2659,12 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (c == '[') {
- if (IS_SYNTAX_OP(syn, REG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') {
- WCINT send[] = { (WCINT )':', (WCINT )']' };
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') {
+ OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
tok->backp = p; /* point at '[' is readed */
PINC;
- if (str_exist_check_with_esc(send, 2, p, end, (WCINT )']', env->enc)) {
+ if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']',
+ env->enc)) {
tok->type = TK_POSIX_BRACKET_OPEN;
}
else {
@@ -2509,7 +2674,7 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else {
cc_in_cc:
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_CCLASS_SET)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
tok->type = TK_CC_CC_OPEN;
}
else {
@@ -2518,7 +2683,8 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (c == '&') {
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_CCLASS_SET) && !PEND && PPEEK == '&') {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
+ !PEND && PPEEK == '&') {
PINC;
tok->type = TK_CC_AND;
}
@@ -2530,10 +2696,10 @@ fetch_token_in_cc(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
static int
-fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
+fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, c, num;
- RegSyntaxType* syn = env->syntax;
+ OnigSyntaxType* syn = env->syntax;
UChar* prev;
UChar* p = *src;
@@ -2546,15 +2712,15 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_BYTE;
tok->base = 0;
PFETCH(c);
- if (c == '\\') {
- if (PEND) return REGERR_END_PATTERN_AT_BACKSLASH;
+ if (c == MC_ESC) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
PFETCH(c);
tok->u.c = c;
tok->escaped = 1;
switch (c) {
case '*':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_0INF)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -2562,7 +2728,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '+':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_1INF)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 1;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -2570,20 +2736,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '?':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_01)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
greedy_check:
- if (!PEND && PPEEK == '?' && IS_SYNTAX_OP(syn, REG_SYN_OP_NON_GREEDY)) {
+ if (!PEND && PPEEK == '?' &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
PFETCH(c);
tok->u.repeat.greedy = 0;
tok->u.repeat.possessive = 0;
}
else if (!PEND && PPEEK == '+' &&
- ((IS_SYNTAX_OP2(syn, REG_SYN_OP2_POSSESSIVE_REPEAT) &&
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
tok->type != TK_INTERVAL) ||
- (IS_SYNTAX_OP2(syn, REG_SYN_OP2_POSSESSIVE_INTERVAL) &&
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
tok->type == TK_INTERVAL))) {
PFETCH(c);
tok->u.repeat.greedy = 1;
@@ -2596,7 +2763,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '{':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_INTERVAL)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
tok->backp = p;
r = fetch_range_qualifier(&p, end, tok, env);
if (r < 0) return r; /* error */
@@ -2608,115 +2775,115 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '|':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_ALT)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
tok->type = TK_ALT;
break;
case '(':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_SUBEXP)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
tok->type = TK_SUBEXP_OPEN;
break;
case ')':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_SUBEXP)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
tok->type = TK_SUBEXP_CLOSE;
break;
case 'w':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_WORD;
break;
case 'W':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_NOT_WORD;
break;
case 'b':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BOUND)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
tok->u.anchor = ANCHOR_WORD_BOUND;
break;
case 'B':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BOUND)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
break;
#ifdef USE_WORD_BEGIN_END
case '<':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BEGIN_END)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
tok->type = TK_ANCHOR;
tok->u.anchor = ANCHOR_WORD_BEGIN;
break;
case '>':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WORD_BEGIN_END)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
tok->type = TK_ANCHOR;
tok->u.anchor = ANCHOR_WORD_END;
break;
#endif
case 's':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WHITE_SPACE)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_WHITE_SPACE;
break;
case 'S':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_WHITE_SPACE)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
break;
case 'd':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_DIGIT)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_DIGIT;
break;
case 'D':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_DIGIT)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_NOT_DIGIT;
break;
case 'A':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
begin_buf:
tok->type = TK_ANCHOR;
tok->u.subtype = ANCHOR_BEGIN_BUF;
break;
case 'Z':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = ANCHOR_SEMI_END_BUF;
break;
case 'z':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
end_buf:
tok->type = TK_ANCHOR;
tok->u.subtype = ANCHOR_END_BUF;
break;
case 'G':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = ANCHOR_BEGIN_POSITION;
break;
case '`':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_GNU_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
goto begin_buf;
break;
case '\'':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_GNU_BUF_ANCHOR)) break;
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
goto end_buf;
break;
@@ -2724,26 +2891,26 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (PEND) break;
prev = p;
- if (PPEEK == '{' && IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
- if (num < 0) return REGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND && IS_XDIGIT(*p) && p - prev >= 9)
- return REGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
if (p > prev + 1 && !PEND && PPEEK == '}') {
PINC;
- tok->type = TK_WC;
- tok->u.wc = (WCINT )num;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
}
else {
/* can't read nothing or invalid format */
p = prev;
}
}
- else if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_X_HEX2)) {
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2757,9 +2924,9 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (PEND) break;
prev = p;
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_ESC_U_HEX4)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2773,20 +2940,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '5': case '6': case '7': case '8': case '9':
PUNFETCH;
prev = p;
- num = regex_scan_unsigned_number(&p, end, env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
- if (num > REG_MAX_BACKREF_NUM) return REGERR_TOO_BIG_BACKREF_NUMBER;
+ num = onig_scan_unsigned_number(&p, end, env->enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num > ONIG_MAX_BACKREF_NUM) return ONIGERR_TOO_BIG_BACKREF_NUMBER;
- if (IS_SYNTAX_OP(syn, REG_SYN_OP_BACK_REF) &&
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
(num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
- if (IS_SYNTAX_BV(syn, REG_SYN_STRICT_CHECK_BACKREF)) {
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
- return REGERR_INVALID_BACKREF;
+ return ONIGERR_INVALID_BACKREF;
}
tok->type = TK_BACKREF;
- tok->u.backref.num = 1;
- tok->u.backref.ref1 = num;
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = num;
+ tok->u.backref.by_name = 0;
break;
}
else if (c == '8' || c == '9') {
@@ -2798,10 +2966,10 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
p = prev;
/* fall through */
case '0':
- if (IS_SYNTAX_OP(syn, REG_SYN_OP_ESC_OCTAL3)) {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
prev = p;
num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), env->enc);
- if (num < 0) return REGERR_TOO_BIG_NUMBER;
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -2814,33 +2982,34 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
break;
-#ifdef USE_NAMED_SUBEXP
+#ifdef USE_NAMED_GROUP
case 'k':
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_NAMED_SUBEXP)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
PFETCH(c);
if (c == '<') {
UChar* name_end;
int* backs;
prev = p;
- r = fetch_name(&p, end, &name_end, env);
+ r = fetch_name(&p, end, &name_end, env, 1);
if (r < 0) return r;
- num = regex_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
if (num <= 0) {
- regex_scan_env_set_error_string(env,
- REGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
- return REGERR_UNDEFINED_NAME_REFERENCE;
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
- if (IS_SYNTAX_BV(syn, REG_SYN_STRICT_CHECK_BACKREF)) {
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
int i;
for (i = 0; i < num; i++) {
if (backs[i] > env->num_mem ||
IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
- return REGERR_INVALID_BACKREF;
+ return ONIGERR_INVALID_BACKREF;
}
}
tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 1;
if (num == 1) {
tok->u.backref.num = 1;
tok->u.backref.ref1 = backs[0];
@@ -2858,13 +3027,13 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_SUBEXP_CALL
case 'g':
- if (IS_SYNTAX_OP2(syn, REG_SYN_OP2_SUBEXP_CALL)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
if (c == '<') {
UChar* name_end;
prev = p;
- r = fetch_name(&p, end, &name_end, env);
+ r = fetch_name(&p, end, &name_end, env, 1);
if (r < 0) return r;
tok->type = TK_CALL;
@@ -2878,11 +3047,21 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
case 'Q':
- if (IS_SYNTAX_OP(syn, REG_SYN_OP_QUOTE)) {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
tok->type = TK_QUOTE_OPEN;
}
break;
+ case 'p':
+ case 'P':
+ if (PPEEK == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+ }
+ break;
+
default:
PUNFETCH;
num = fetch_escaped_value(&p, end, env);
@@ -2899,14 +3078,34 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.c = c;
tok->escaped = 0;
+#ifdef USE_VARIABLE_META_CHARS
+ if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
+ if (c == MC_ANYCHAR)
+ goto any_char;
+ else if (c == MC_ANYTIME)
+ goto anytime;
+ else if (c == MC_ZERO_OR_ONE_TIME)
+ goto zero_or_one_time;
+ else if (c == MC_ONE_OR_MORE_TIME)
+ goto one_or_more_time;
+ else if (c == MC_ANYCHAR_ANYTIME) {
+ tok->type = TK_ANYCHAR_ANYTIME;
+ goto out;
+ }
+ }
+#endif
+
switch (c) {
case '.':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ANYCHAR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+ any_char:
tok->type = TK_ANYCHAR;
break;
case '*':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_0INF)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+ anytime:
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -2914,7 +3113,8 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '+':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_1INF)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+ one_or_more_time:
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 1;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -2922,7 +3122,8 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '?':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_01)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+ zero_or_one_time:
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
@@ -2930,7 +3131,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '{':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_INTERVAL)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
tok->backp = p;
r = fetch_range_qualifier(&p, end, tok, env);
if (r < 0) return r; /* error */
@@ -2942,36 +3143,36 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '|':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_ALT)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
tok->type = TK_ALT;
break;
case '(':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_SUBEXP)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
tok->type = TK_SUBEXP_OPEN;
break;
case ')':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_SUBEXP)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
tok->type = TK_SUBEXP_CLOSE;
break;
case '^':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_LINE_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->option)
? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
break;
case '$':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_LINE_ANCHOR)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->option)
? ANCHOR_END_BUF : ANCHOR_END_LINE);
break;
case '[':
- if (! IS_SYNTAX_OP(syn, REG_SYN_OP_CC)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
tok->type = TK_CC_OPEN;
break;
@@ -2984,7 +3185,7 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_EXTEND(env->option)) {
while (!PEND) {
PFETCH(c);
- if (IS_NEWLINE(c))
+ if (ONIG_IS_NEWLINE(c))
break;
}
goto start;
@@ -3002,31 +3203,182 @@ fetch_token(RegToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
+ out:
*src = p;
return tok->type;
}
-static void
-bitset_by_pred_func(BitSetRef bs, int (*pf)(RegCharEncoding, UChar),
- RegCharEncoding code, int not)
+static int
+add_ctype_to_cc_by_list(CClassNode* cc, int ctype, int not,
+ OnigEncoding enc)
{
- int c;
+ int i, j, r, nsb, nmb;
+ OnigCodePointRange *sbr, *mbr;
+
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
+ if (r != 0) return r;
- if (not) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! pf(code, (UChar )c)) BITSET_SET_BIT(bs, c);
+ if (not == 0) {
+ for (i = 0; i < nsb; i++) {
+ for (j = sbr[i].from; j <= sbr[i].to; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ for (i = 0; i < nmb; i++) {
+ r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
+ if (r != 0) return r;
}
}
else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (pf(code, (UChar )c)) BITSET_SET_BIT(bs, c);
+ OnigCodePoint prev = 0;
+ for (i = 0; i < nsb; i++) {
+ for (j = prev; j < sbr[i].from; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ prev = sbr[i].to + 1;
+ }
+ if (prev < 0x7f) {
+ for (j = prev; j < 0x7f; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+
+ prev = 0x80;
+ for (i = 0; i < nmb; i++) {
+ if (prev < mbr[i].from) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
+ if (r != 0) return r;
+ }
+ prev = mbr[i].to + 1;
+ }
+ if (prev < 0x7fffffff) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
+ if (r != 0) return r;
}
}
+
+ return r;
+}
+
+static int
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
+{
+ int c, r;
+ OnigEncoding enc = env->enc;
+
+ if (ONIGENC_CTYPE_SUPPORT_LEVEL(enc) != ONIGENC_CTYPE_SUPPORT_LEVEL_SB) {
+ r = add_ctype_to_cc_by_list(cc, ctype, not, env->enc);
+ return r;
+ }
+
+ r = 0;
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ case ONIGENC_CTYPE_BLANK:
+ case ONIGENC_CTYPE_CNTRL:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_LOWER:
+ case ONIGENC_CTYPE_PUNCT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_UPPER:
+ case ONIGENC_CTYPE_XDIGIT:
+ case ONIGENC_CTYPE_ASCII:
+ case ONIGENC_CTYPE_ALNUM:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ case ONIGENC_CTYPE_GRAPH:
+ case ONIGENC_CTYPE_PRINT:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ break;
+
+ case ONIGENC_CTYPE_WORD:
+ if (not == 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_SB_WORD(enc, c) && ! ONIGENC_IS_MBC_HEAD(enc, c))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+parse_ctype_to_enc_ctype(int pctype, int* not)
+{
+ int ctype;
+
+ switch (pctype) {
+ case CTYPE_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 1;
+ break;
+ case CTYPE_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 1;
+ break;
+ case CTYPE_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 0;
+ break;
+ case CTYPE_NOT_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 1;
+ break;
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ return ctype;
}
typedef struct {
- UChar *name;
- int (*pf)(RegCharEncoding, UChar);
+ UChar *name;
+ int ctype;
short int len;
} PosixBracketEntryType;
@@ -3037,24 +3389,24 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
#define POSIX_BRACKET_NAME_MAX_LEN 6
static PosixBracketEntryType PBS[] = {
- { "alnum", is_code_alnum, 5 },
- { "alpha", is_code_alpha, 5 },
- { "blank", is_code_blank, 5 },
- { "cntrl", is_code_cntrl, 5 },
- { "digit", is_code_digit, 5 },
- { "graph", is_code_graph, 5 },
- { "lower", is_code_lower, 5 },
- { "print", is_code_print, 5 },
- { "punct", is_code_punct, 5 },
- { "space", is_code_space, 5 },
- { "upper", is_code_upper, 5 },
- { "xdigit", is_code_xdigit, 6 },
- { "ascii", is_code_ascii, 5 }, /* I don't know origin. Perl? */
- { (UChar* )NULL, is_code_alnum, 0 }
+ { "alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { "alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { "blank", ONIGENC_CTYPE_BLANK, 5 },
+ { "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { "digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { "graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { "lower", ONIGENC_CTYPE_LOWER, 5 },
+ { "print", ONIGENC_CTYPE_PRINT, 5 },
+ { "punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { "space", ONIGENC_CTYPE_SPACE, 5 },
+ { "upper", ONIGENC_CTYPE_UPPER, 5 },
+ { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
+ { (UChar* )NULL, -1, 0 }
};
PosixBracketEntryType *pb;
- int not, i, c;
+ int not, i, c, r;
UChar *p = *src;
if (PPEEK == '^') {
@@ -3068,12 +3420,14 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
goto not_posix_bracket;
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (k_strncmp(p, pb->name, pb->len) == 0) {
+ if (onig_strncmp(p, pb->name, pb->len) == 0) {
p += pb->len;
if (end - p < 2 || *p != ':' || *(p+1) != ']')
- return REGERR_INVALID_POSIX_BRACKET_TYPE;
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+
+ r = add_ctype_to_cc(cc, pb->ctype, not, env);
+ if (r != 0) return r;
- bitset_by_pred_func(cc->bs, pb->pf, env->enc, not);
PINC; PINC;
*src = p;
return 0;
@@ -3092,13 +3446,89 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
if (!PEND) {
PFETCH(c);
if (c == ']')
- return REGERR_INVALID_POSIX_BRACKET_TYPE;
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
}
}
return 1; /* 1: is not POSIX bracket, but no error. */
}
+static int
+property_name_to_ctype(UChar* p, UChar* end)
+{
+ static PosixBracketEntryType PBS[] = {
+ { "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { "Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { "Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { "Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { "Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { "Print", ONIGENC_CTYPE_PRINT, 5 },
+ { "Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { "Space", ONIGENC_CTYPE_SPACE, 5 },
+ { "Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { "ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int len;
+
+ len = end - p;
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (len == pb->len && onig_strncmp(p, pb->name, pb->len) == 0)
+ return pb->ctype;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+static int
+fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
+{
+ int ctype;
+ UChar *prev, *p = *src;
+ int c = 0;
+
+ while (!PEND) {
+ prev = p;
+ PFETCH(c);
+ if (c == '}') {
+ ctype = property_name_to_ctype(*src, prev);
+ if (ctype < 0) return ctype;
+
+ *src = p;
+ return ctype;
+ }
+ else if (c == '(' || c == ')' || c == '{' || c == '|')
+ break;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+static int
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, ctype;
+ CClassNode* cc;
+
+ ctype = fetch_char_property_to_ctype(src, end, env);
+ if (ctype < 0) return ctype;
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ r = add_ctype_to_cc(cc, ctype, 0, env);
+ if (r != 0) return r;
+ if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
+
+ return 0;
+}
+
enum CCSTATE {
CCS_VALUE,
@@ -3109,79 +3539,36 @@ enum CCSTATE {
enum CCVALTYPE {
CCV_SB,
- CCV_WC,
+ CCV_CODE_POINT,
CCV_CLASS
};
static int
-next_state_class(CClassNode* cc, RegToken* tok, WCINT* vs,
- enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env)
+next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
{
- int r, c;
+ int r;
if (*state == CCS_RANGE)
- return REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
+ return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
if (*state == CCS_VALUE && *type != CCV_CLASS) {
if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs));
- else if (*type == CCV_WC) {
- r = add_wc_range(&(cc->mbuf), env, *vs, *vs);
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
}
}
- if (tok->type == TK_CHAR_TYPE) {
- switch (tok->u.subtype) {
- case CTYPE_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_WORD(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf);
- break;
- case CTYPE_NOT_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_CODE_WORD(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- break;
- case CTYPE_WHITE_SPACE:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- break;
- case CTYPE_NOT_WHITE_SPACE:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf);
- break;
- case CTYPE_DIGIT:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- break;
- case CTYPE_NOT_DIGIT:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(env->enc, cc->mbuf);
- break;
- default:
- return REGERR_PARSER_BUG;
- break;
- }
- }
- else { /* TK_POSIX_BRACKET_OPEN */
- /* nothing */
- }
-
*state = CCS_VALUE;
*type = CCV_CLASS;
return 0;
}
static int
-next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw,
+next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+ int* vs_israw, int v_israw,
enum CCVALTYPE intype, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
@@ -3191,8 +3578,8 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw,
case CCS_VALUE:
if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs));
- else if (*type == CCV_WC) {
- r = add_wc_range(&(cc->mbuf), env, *vs, *vs);
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
}
break;
@@ -3200,55 +3587,28 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
- if (IS_IGNORECASE(env->option) && (*vs_israw == 0 && v_israw == 0)) {
- int low, high;
-
- low = TOLOWER(env->enc, *vs);
- high = TOLOWER(env->enc, v);
- if (low > high) {
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return REGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
-
- if (low < 'A' && high >= 'a' && high <= 'z') {
- bitset_set_range(cc->bs, low, (int )'A' - 1);
- bitset_set_range(cc->bs, (int )'a', high);
- }
- else if (high > 'z' && low >= 'a' && low <= 'z') {
- bitset_set_range(cc->bs, low, (int )'z');
- bitset_set_range(cc->bs, (int )'z' + 1, high);
- }
- else {
- bitset_set_range(cc->bs, low, high);
- }
- }
- else {
- if (*vs > v) {
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return REGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
- bitset_set_range(cc->bs, (int )*vs, (int )v);
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
+ bitset_set_range(cc->bs, (int )*vs, (int )v);
}
else {
- r = add_wc_range(&(cc->mbuf), env, *vs, v);
+ r = add_code_range(&(cc->mbuf), env, *vs, v);
if (r < 0) return r;
}
}
else {
-#ifndef REG_RUBY_M17N
- if (env->enc == REGCODE_UTF8 && intype == CCV_WC && *type == CCV_SB) {
+ if (intype == CCV_CODE_POINT && *type == CCV_SB &&
+ ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
bitset_set_range(cc->bs, (int )*vs, 0x7f);
- r = add_wc_range(&(cc->mbuf), env, (WCINT )0x80, v);
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
if (r < 0) return r;
}
else
-#endif
- return REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
+ return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
}
ccs_range_end:
*state = CCS_COMPLETE;
@@ -3271,7 +3631,7 @@ next_state_val(CClassNode* cc, WCINT *vs, WCINT v, int* vs_israw, int v_israw,
static int
char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped,
- RegCharEncoding enc)
+ OnigEncoding enc)
{
int in_esc;
UChar* p = from;
@@ -3283,19 +3643,19 @@ char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped,
}
else {
if (*p == c) return 1;
- if (*p == '\\') in_esc = 1;
+ if (*p == MC_ESC) in_esc = 1;
}
- p += mblen(enc, *p);
+ p += enc_len(enc, *p);
}
return 0;
}
static int
-parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
+parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
ScanEnv* env)
{
int r, neg, len, fetched, and_start;
- WCINT v, vs;
+ OnigCodePoint v, vs;
UChar *p;
Node* node;
CClassNode *cc, *prev_cc;
@@ -3305,9 +3665,10 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;
+ prev_cc = (CClassNode* )NULL;
*np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
- if (r == TK_BYTE && tok->u.c == '^') {
+ if (r == TK_BYTE && tok->u.c == '^' && tok->escaped == 0) {
neg = 1;
r = fetch_token_in_cc(tok, src, end, env);
}
@@ -3318,16 +3679,15 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
if (r < 0) return r;
if (r == TK_CC_CLOSE) {
if (! char_exist_check(']', *src, env->pattern_end, 1, env->enc))
- return REGERR_EMPTY_CHAR_CLASS;
+ return ONIGERR_EMPTY_CHAR_CLASS;
CC_ESC_WARN(env, "]");
r = tok->type = TK_BYTE; /* allow []...] */
}
*np = node = node_new_cclass();
- CHECK_NULL_RETURN_VAL(node, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
cc = &(NCCLASS(node));
- prev_cc = (CClassNode* )NULL;
and_start = 0;
state = CCS_START;
@@ -3336,32 +3696,34 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
fetched = 0;
switch (r) {
case TK_BYTE:
- len = mblen(env->enc, tok->u.c);
+ len = enc_len(env->enc, tok->u.c);
if (len > 1) {
PUNFETCH;
- v = MB2WC(p, end, env->enc);
+ v = ONIGENC_MBC_TO_CODE(env->enc, p, end);
p += len;
+ in_type = CCV_CODE_POINT;
}
else {
sb_char:
- v = (WCINT )tok->u.c;
+ v = (OnigCodePoint )tok->u.c;
+ in_type = CCV_SB;
}
in_israw = 0;
- goto val_entry;
+ goto val_entry2;
break;
case TK_RAW_BYTE:
- len = mblen(env->enc, tok->u.c);
+ len = enc_len(env->enc, tok->u.c);
if (len > 1 && tok->base != 0) { /* tok->base != 0 : octal or hexadec. */
- UChar buf[WC2MB_MAX_BUFLEN];
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
UChar* bufp = buf;
- UChar* bufe = buf + WC2MB_MAX_BUFLEN;
+ UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
int i, base = tok->base;
- if (len > WC2MB_MAX_BUFLEN) {
+ if (len > ONIGENC_CODE_TO_MBC_MAXLEN) {
bufp = (UChar* )xmalloc(len);
if (IS_NULL(bufp)) {
- r = REGERR_MEMORY;
+ r = ONIGERR_MEMORY;
goto err;
}
bufe = bufp + len;
@@ -3374,27 +3736,34 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
bufp[i] = tok->u.c;
}
if (i < len) {
- r = REGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
raw_byte_err:
if (bufp != buf) xfree(bufp);
goto err;
}
- v = MB2WC(bufp, bufe, env->enc);
- fetched = 1;
+ v = ONIGENC_MBC_TO_CODE(env->enc, bufp, bufe);
if (bufp != buf) xfree(bufp);
+ in_type = CCV_CODE_POINT;
}
else {
- v = (WCINT )tok->u.c;
+ v = (OnigCodePoint )tok->u.c;
+ in_type = CCV_SB;
}
in_israw = 1;
- goto val_entry;
+ goto val_entry2;
break;
- case TK_WC:
- v = tok->u.wc;
+ case TK_CODE_POINT:
+ v = tok->u.code;
in_israw = 1;
val_entry:
- in_type = (v < SINGLE_BYTE_SIZE ? CCV_SB : CCV_WC);
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+ in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
+ val_entry2:
r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
&state, env);
if (r != 0) goto err;
@@ -3406,16 +3775,38 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, "[");
p = tok->backp;
- v = (WCINT )tok->u.c;
+ v = (OnigCodePoint )tok->u.c;
in_israw = 0;
goto val_entry;
}
- /* POSIX bracket fall */
+ goto next_class;
+ break;
+
case TK_CHAR_TYPE:
- r = next_state_class(cc, tok, &vs, &val_type, &state, env);
+ {
+ int ctype, not;
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ r = add_ctype_to_cc(cc, ctype, not, env);
+ if (r != 0) return r;
+ }
+
+ next_class:
+ r = next_state_class(cc, &vs, &val_type, &state, env);
if (r != 0) goto err;
break;
+ case TK_CHAR_PROPERTY:
+ {
+ int ctype;
+
+ ctype = fetch_char_property_to_ctype(&p, end, env);
+ if (ctype < 0) return ctype;
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
+ goto next_class;
+ }
+ break;
+
case TK_CC_RANGE:
if (state == CCS_VALUE) {
r = fetch_token_in_cc(tok, &p, end, env);
@@ -3423,7 +3814,7 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
fetched = 1;
if (r == TK_CC_CLOSE) { /* allow [x-] */
range_end_val:
- v = (WCINT )'-';
+ v = (OnigCodePoint )'-';
in_israw = 0;
goto val_entry;
}
@@ -3435,7 +3826,7 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
}
else if (state == CCS_START) {
/* [-xa] is allowed */
- v = (WCINT )tok->u.c;
+ v = (OnigCodePoint )tok->u.c;
in_israw = 0;
r = fetch_token_in_cc(tok, &p, end, env);
@@ -3461,11 +3852,11 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
goto range_end_val;
}
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_RANGE_OP_IN_CC)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
CC_ESC_WARN(env, "-");
goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
}
- r = REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err;
}
break;
@@ -3478,10 +3869,10 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
r = parse_char_class(&anode, tok, &p, end, env);
if (r != 0) goto cc_open_err;
acc = &(NCCLASS(anode));
- r = or_cclass(cc, acc);
+ r = or_cclass(cc, acc, env->enc);
+ onig_node_free(anode);
cc_open_err:
- regex_node_free(anode);
if (r != 0) goto err;
}
break;
@@ -3498,8 +3889,9 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
state = CCS_START;
if (IS_NOT_NULL(prev_cc)) {
- r = and_cclass(prev_cc, cc);
+ r = and_cclass(prev_cc, cc, env->enc);
if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
}
else {
prev_cc = cc;
@@ -3510,11 +3902,11 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
break;
case TK_EOT:
- r = REGERR_PREMATURE_END_OF_CHAR_CLASS;
+ r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
goto err;
break;
default:
- r = REGERR_PARSER_BUG;
+ r = ONIGERR_PARSER_BUG;
goto err;
break;
}
@@ -3534,55 +3926,60 @@ parse_char_class(Node** np, RegToken* tok, UChar** src, UChar* end,
}
if (IS_NOT_NULL(prev_cc)) {
- r = and_cclass(prev_cc, cc);
+ r = and_cclass(prev_cc, cc, env->enc);
if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
cc = prev_cc;
}
cc->not = neg;
if (cc->not != 0 &&
- IS_SYNTAX_BV(env->syntax, REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
if (is_empty != 0)
BITSET_IS_EMPTY(cc->bs, is_empty);
if (is_empty == 0)
- BITSET_SET_BIT(cc->bs, NEWLINE);
+ BITSET_SET_BIT(cc->bs, ONIG_NEWLINE);
}
*src = p;
return 0;
err:
- regex_node_free(*np);
+ if (cc != &(NCCLASS(*np)))
+ bbuf_free(cc->mbuf);
+ onig_node_free(*np);
return r;
}
-static int parse_subexp(Node** top, RegToken* tok, int term,
+static int parse_subexp(Node** top, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env);
static int
-parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end,
+parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
{
Node *target;
- RegOptionType option;
+ OnigOptionType option;
int r, c, num;
+ int list_capture;
UChar* p = *src;
*np = NULL;
- if (PEND) return REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
option = env->option;
- if (PPEEK == '?' && IS_SYNTAX_OP(env->syntax, REG_SYN_OP_SUBEXP_EFFECT)) {
+ if (PPEEK == '?' &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
PINC;
- if (PEND) return REGERR_END_PATTERN_IN_GROUP;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
switch (c) {
case '#': /* (?#...) comment */
while (1) {
- if (PEND) return REGERR_END_PATTERN_IN_GROUP;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
if (c == ')') break;
}
@@ -3590,15 +3987,21 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end,
return 3; /* 3: comment */
break;
- case ':': /* (?:...) grouping only */
- goto group;
+ case ':': /* (?:...) grouping only */
+ group:
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(np, tok, term, &p, end, env);
+ if (r < 0) return r;
+ *src = p;
+ return 1; /* group */
break;
case '=':
- *np = regex_node_new_anchor(ANCHOR_PREC_READ);
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ);
break;
case '!': /* preceding read */
- *np = regex_node_new_anchor(ANCHOR_PREC_READ_NOT);
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
break;
case '>': /* (?>...) stop backtrack */
*np = node_new_effect(EFFECT_STOP_BACKTRACK);
@@ -3607,29 +4010,70 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end,
case '<': /* look behind (?<=...), (?<!...) */
PFETCH(c);
if (c == '=')
- *np = regex_node_new_anchor(ANCHOR_LOOK_BEHIND);
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
else if (c == '!')
- *np = regex_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
-#ifdef USE_NAMED_SUBEXP
- else if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_NAMED_SUBEXP)) {
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
+#ifdef USE_NAMED_GROUP
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
UChar *name_end;
+
PUNFETCH;
+ list_capture = 0;
+
+ named_group:
name = p;
- r = fetch_name(&p, end, &name_end, env);
+ r = fetch_name(&p, end, &name_end, env, 0);
if (r < 0) return r;
- *np = node_new_effect(EFFECT_MEMORY);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
- NEFFECT(*np).regnum = num;
- r = name_add(env->reg, name, name_end, num);
+ if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
if (r != 0) return r;
+ *np = node_new_effect_memory(env->option, 1);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
}
#endif
else
- return REGERR_UNDEFINED_GROUP_OPTION;
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case '@':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
+#ifdef USE_NAMED_GROUP
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ PFETCH(c);
+ if (c == '<') {
+ list_capture = 1;
+ goto named_group; /* (?@<name>...) */
+ }
+ PUNFETCH;
+ }
+#endif
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) {
+ onig_node_free(*np);
+ return num;
+ }
+ else if (num >= BIT_STATUS_BITS_NUM) {
+ onig_node_free(*np);
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ }
+ NEFFECT(*np).regnum = num;
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
break;
#ifdef USE_POSIXLINE_OPTION
@@ -3646,100 +4090,80 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end,
break;
case '-': neg = 1; break;
- case 'x': ONOFF(option, REG_OPTION_EXTEND, neg); break;
- case 'i': ONOFF(option, REG_OPTION_IGNORECASE, neg); break;
+ case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
+ case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
case 's':
- if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, REG_OPTION_MULTILINE, neg);
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
}
else
- return REGERR_UNDEFINED_GROUP_OPTION;
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
case 'm':
- if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, REG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
}
- else if (IS_SYNTAX_OP2(env->syntax, REG_SYN_OP2_OPTION_RUBY)) {
- ONOFF(option, REG_OPTION_MULTILINE, neg);
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
}
else
- return REGERR_UNDEFINED_GROUP_OPTION;
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
#ifdef USE_POSIXLINE_OPTION
case 'p':
- ONOFF(option, REG_OPTION_MULTILINE|REG_OPTION_SINGLELINE, neg);
+ ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
break;
#endif
default:
- return REGERR_UNDEFINED_GROUP_OPTION;
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
}
- if (c == ')') { /* option only */
- if (option == env->option) {
- *np = node_new_empty();
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- *src = p;
- return 0;
- }
- else {
- *np = node_new_option(option);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- *src = p;
- return 2; /* option only */
- }
+ if (c == ')') {
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ *src = p;
+ return 2; /* option only */
}
else if (c == ':') {
- if (env->option == option) {
- group:
- r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
- r = parse_subexp(np, tok, term, &p, end, env);
- if (r < 0) return r;
- *src = p;
- return 1; /* group */
- }
- else {
- RegOptionType prev = env->option;
-
- env->option = option;
- r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env);
- env->option = prev;
- if (r < 0) return r;
- *np = node_new_option(option);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- NEFFECT(*np).target = target;
- *src = p;
- return 0;
- }
+ OnigOptionType prev = env->option;
+
+ env->option = option;
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).target = target;
+ *src = p;
+ return 0;
}
- if (PEND) return REGERR_END_PATTERN_IN_GROUP;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
}
}
break;
default:
- return REGERR_UNDEFINED_GROUP_OPTION;
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
}
}
else {
-#ifdef USE_NAMED_SUBEXP
- if (IS_REG_OPTION_ON(env->option, REG_OPTION_CAPTURE_ONLY_NAMED_GROUP)) {
+#ifdef USE_NAMED_GROUP
+ if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
goto group;
- }
#endif
- *np = node_new_effect(EFFECT_MEMORY);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
NEFFECT(*np).regnum = num;
}
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
@@ -3747,8 +4171,14 @@ parse_effect(Node** np, RegToken* tok, int term, UChar** src, UChar* end,
if (NTYPE(*np) == N_ANCHOR)
NANCHOR(*np).target = target;
- else
+ else {
NEFFECT(*np).target = target;
+ if (NEFFECT(*np).type == EFFECT_MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
+ if (r != 0) return r;
+ }
+ }
*src = p;
return 0;
@@ -3784,16 +4214,19 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
QualifierNode* qnt = &(NQUALIFIER(target));
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (qn->by_number == 0 && qnt->by_number == 0) {
+ if (qn->by_number == 0 && qnt->by_number == 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
if (IS_REPEAT_INFINITE(qn->upper)) {
if (qn->lower == 0) { /* '*' */
redundant:
{
char buf[WARN_BUFSIZE];
- regex_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- "redundant nested repeat operator");
- VERB_WARNING(buf);
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ "redundant nested repeat operator");
+ (*onig_verb_warn)(buf);
+ }
goto warn_exit;
}
}
@@ -3816,13 +4249,11 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
}
}
}
-#endif
-#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
warn_exit:
#endif
if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
- reduce_nested_qualifier(qnode, target);
+ onig_reduce_nested_qualifier(qnode, target);
goto q_exit;
}
}
@@ -3837,12 +4268,82 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
return 0;
}
+#ifdef USE_FOLD_MATCH
+static int
+make_alt_node_from_fold_info(OnigEncFoldMatchInfo* info, Node** node)
+{
+ int i;
+ UChar *s, *end;
+ Node *root, **ptail, *snode;
+
+ ptail = &root;
+ for (i = 0; i < info->target_num; i++) {
+ s = info->target_str[i];
+ end = s + info->target_byte_len[i];
+ /* ex.
+ U+00DF match "ss" and "SS, but not match "Ss".
+ So, string nodes must be raw.
+ */
+ snode = node_new_str_raw(s, end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ *ptail = node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ ptail = &(NCONS(*ptail).right);
+ }
+ *ptail = NULL_NODE;
+ *node = root;
+ return 0;
+}
+
static int
-parse_exp(Node** np, RegToken* tok, int term,
+make_fold_alt_node_from_cc(OnigEncoding enc, CClassNode* cc, Node** root)
+{
+ int i, j, flen, len, ncode, n;
+ UChar *s, *end, buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ OnigCodePoint* codes;
+ Node **ptail, *snode;
+ OnigEncFoldMatchInfo* info;
+
+ *root = NULL_NODE;
+ ptail = root;
+
+ ncode = ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc, &codes);
+ n = 0;
+ for (i = 0; i < ncode; i++) {
+ if (onig_is_code_in_cc(enc, codes[i], cc)) {
+ len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
+ flen = ONIGENC_GET_FOLD_MATCH_INFO(enc, buf, buf + len, &info);
+ if (flen > 0) { /* fold */
+ for (j = 0; j < info->target_num; j++) {
+ s = info->target_str[j];
+ end = s + info->target_byte_len[j];
+ if (onig_strncmp(s, buf, enc_len(enc, *s)) == 0)
+ continue; /* ignore single char. */
+
+ snode = node_new_str_raw(s, end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+ *ptail = node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ ptail = &(NCONS(*ptail).right);
+ n++;
+ }
+ }
+ }
+ }
+
+ return n;
+}
+#endif
+
+static int
+parse_exp(Node** np, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
{
- int r, len, c, group = 0;
+ int r, len, group = 0;
Node* qn;
+ Node** targetp;
start:
*np = NULL;
@@ -3863,9 +4364,13 @@ parse_exp(Node** np, RegToken* tok, int term,
if (r == 1) group = 1;
else if (r == 2) { /* option only */
Node* target;
+ OnigOptionType prev = env->option;
+
+ env->option = NEFFECT(*np).option;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, src, end, env);
+ env->option = prev;
if (r < 0) return r;
NEFFECT(*np).target = target;
return tok->type;
@@ -3875,17 +4380,11 @@ parse_exp(Node** np, RegToken* tok, int term,
if (r < 0) return r;
goto start;
}
- else {
- if (NTYPE(*np) == N_EFFECT && NEFFECT(*np).type == EFFECT_MEMORY) {
- r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
- if (r != 0) return r;
- }
- }
break;
case TK_SUBEXP_CLOSE:
- if (! IS_SYNTAX_BV(env->syntax, REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
- return REGERR_UNMATCHED_CLOSE_PARENTHESIS;
+ if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
+ return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
if (tok->escaped) goto tk_raw_byte;
else goto tk_byte;
@@ -3893,58 +4392,124 @@ parse_exp(Node** np, RegToken* tok, int term,
case TK_BYTE:
tk_byte:
- *np = node_new_str_char((UChar )tok->u.c);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- len = mblen(env->enc, tok->u.c);
- if (len > 1) {
- regex_node_str_cat(*np, *src, *src + len - 1);
- *src += (len - 1);
- }
- while (1) {
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- if (r != TK_BYTE) goto repeat;
+ {
+ *np = node_new_str_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- r = node_str_cat_char(*np, (UChar )tok->u.c);
- if (r < 0) return r;
- len = mblen(env->enc, tok->u.c);
- if (len > 1) {
- regex_node_str_cat(*np, *src, *src + len - 1);
- *src += (len - 1);
+ while (1) {
+ len = enc_len(env->enc, tok->u.c);
+ if (len > 1) {
+ r = onig_node_str_cat(*np, *src, *src + len - 1);
+ if (r < 0) return r;
+ *src += (len - 1);
+ }
+
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_BYTE) break;
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
}
+
+ fold_entry:
+#ifdef USE_FOLD_MATCH
+ if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
+ int flen, ret;
+ Node *root, **ptail, *work, *snode, *anode;
+ UChar *p, *pprev;
+ OnigEncFoldMatchInfo* fold_info;
+ StrNode* sn = &(NSTRING(*np));
+
+ ptail = &root;
+ pprev = sn->s;
+ for (p = sn->s; p < sn->end; ) {
+ flen = ONIGENC_GET_FOLD_MATCH_INFO(env->enc, p, sn->end, &fold_info);
+ if (flen > 0) { /* fold */
+ ret = make_alt_node_from_fold_info(fold_info, &anode);
+ if (ret != 0) return ret;
+ work = node_new_list(anode, NULL);
+ CHECK_NULL_RETURN_VAL(work, ONIGERR_MEMORY);
+
+ if (pprev < p) {
+ snode = node_new_str(pprev, p);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ *ptail = node_new_list(snode, work);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ }
+ else {
+ *ptail = work;
+ }
+ ptail = &(NCONS(work).right);
+ p += flen;
+ pprev = p;
+ }
+ else
+ p += enc_len(env->enc, *p);
+ }
+ *ptail = NULL_NODE;
+ if (IS_NOT_NULL(root)) {
+ if (pprev < sn->end) {
+ snode = node_new_str(pprev, sn->end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ *ptail = node_new_list(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ }
+ onig_node_free(*np);
+ *np = root;
+ }
+ }
+#endif
+ targetp = np;
+ goto repeat;
}
break;
case TK_RAW_BYTE:
tk_raw_byte:
- *np = node_new_str_raw_char((UChar )tok->u.c);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- while (1) {
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- if (r != TK_RAW_BYTE) goto repeat;
+ {
+ int expect_len;
- r = node_str_cat_char(*np, (UChar )tok->u.c);
- if (r < 0) return r;
+ *np = node_new_str_raw_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ expect_len = enc_len(env->enc, tok->u.c);
+ len = 1;
+ while (1) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_RAW_BYTE) {
+#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ if (len >= expect_len) {
+ NSTRING_CLEAR_RAW(*np);
+ }
+#endif
+ goto fold_entry;
+ }
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
+ len++;
+ }
}
break;
- case TK_WC:
+ case TK_CODE_POINT:
{
- UChar buf[WC2MB_MAX_BUFLEN];
- UChar* bufs = buf;
- UChar* bufe = bufs + WC2MB_MAX_BUFLEN;
- int num = wc2mb_buf(tok->u.wc, &bufs, &bufe, env->enc);
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
if (num < 0) return num;
- *np = node_new_str_raw(bufs, bufe);
- if (bufs != buf) xfree(bufs);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ *np = node_new_str_raw(buf, buf + num);
+#else
+ *np = node_new_str(buf, buf + num);
+#endif
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
}
break;
case TK_QUOTE_OPEN:
{
- WCINT end_op[] = { (WCINT )'\\', (WCINT )'E' };
+ OnigCodePoint end_op[] = { (OnigCodePoint )MC_ESC, (OnigCodePoint )'E' };
UChar *qstart, *qend, *nextp;
qstart = *src;
@@ -3953,90 +4518,113 @@ parse_exp(Node** np, RegToken* tok, int term,
nextp = qend = end;
}
*np = node_new_str(qstart, qend);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
*src = nextp;
}
break;
case TK_CHAR_TYPE:
- switch (tok->u.subtype) {
- case CTYPE_WORD:
- case CTYPE_NOT_WORD:
- *np = node_new_ctype(tok->u.subtype);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- break;
-
- case CTYPE_WHITE_SPACE:
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c);
- }
- break;
-
- case CTYPE_NOT_WHITE_SPACE:
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_CODE_SPACE(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c);
- }
- break;
+ {
+ switch (tok->u.subtype) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *np = node_new_ctype(tok->u.subtype);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
- case CTYPE_DIGIT:
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c);
- }
- break;
+ case CTYPE_WHITE_SPACE:
+ case CTYPE_NOT_WHITE_SPACE:
+ case CTYPE_DIGIT:
+ case CTYPE_NOT_DIGIT:
+ {
+ CClassNode* cc;
+ int ctype, not;
+
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ add_ctype_to_cc(cc, ctype, 0, env);
+ if (not != 0) CCLASS_SET_NOT(cc);
+ }
+ break;
- case CTYPE_NOT_DIGIT:
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_CODE_DIGIT(env->enc, c)) BITSET_SET_BIT(NCCLASS(*np).bs, c);
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
}
- break;
-
- default:
- return REGERR_PARSER_BUG;
- break;
}
break;
+ case TK_CHAR_PROPERTY:
+ r = parse_char_property(np, tok, src, end, env);
+ if (r != 0) return r;
+ break;
+
case TK_CC_OPEN:
r = parse_char_class(np, tok, src, end, env);
if (r != 0) return r;
+
+#ifdef USE_FOLD_MATCH
+ if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
+ int res;
+ Node *alt_root, *work;
+ CClassNode* cc = &(NCCLASS(*np));
+
+ res = make_fold_alt_node_from_cc(env->enc, cc, &alt_root);
+ if (res < 0) return res;
+ if (res > 0) {
+ work = node_new_alt(*np, alt_root);
+ if (IS_NULL(work)) {
+ onig_node_free(alt_root);
+ return ONIGERR_MEMORY;
+ }
+ *np = work;
+ }
+ }
+#endif
break;
case TK_ANYCHAR:
*np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+ case TK_ANYCHAR_ANYTIME:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUALIFIER(qn).target = *np;
+ *np = qn;
break;
case TK_BACKREF:
len = tok->u.backref.num;
*np = node_new_backref(len,
- (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), env);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+ tok->u.backref.by_name, env);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
break;
#ifdef USE_SUBEXP_CALL
case TK_CALL:
*np = node_new_call(tok->u.call.name, tok->u.call.name_end);
- CHECK_NULL_RETURN_VAL(*np, REGERR_MEMORY);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
env->num_call++;
break;
#endif
case TK_ANCHOR:
- *np = regex_node_new_anchor(tok->u.anchor);
+ *np = onig_node_new_anchor(tok->u.anchor);
break;
case TK_OP_REPEAT:
case TK_INTERVAL:
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_CONTEXT_INDEP_OPS)) {
- if (IS_SYNTAX_BV(env->syntax, REG_SYN_CONTEXT_INVALID_OPS))
- return REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
else
*np = node_new_empty();
}
@@ -4047,50 +4635,58 @@ parse_exp(Node** np, RegToken* tok, int term,
break;
default:
- return REGERR_PARSER_BUG;
+ return ONIGERR_PARSER_BUG;
break;
}
- re_entry:
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
-
- repeat:
- if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
- if (is_invalid_qualifier_target(*np))
- return REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ {
+ targetp = np;
- qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
- (r == TK_INTERVAL ? 1 : 0));
- CHECK_NULL_RETURN_VAL(qn, REGERR_MEMORY);
- NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
- r = set_qualifier(qn, *np, group, env);
+ re_entry:
+ r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- if (tok->u.repeat.possessive != 0) {
- Node* en;
- en = node_new_effect(EFFECT_STOP_BACKTRACK);
- CHECK_NULL_RETURN_VAL(en, REGERR_MEMORY);
- NEFFECT(en).target = qn;
- qn = en;
- }
+ repeat:
+ if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ if (is_invalid_qualifier_target(*targetp))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
- if (r == 0) {
- *np = qn;
- }
- else if (r == 2) { /* split case: /abc+/ */
- Node* target = *np;
- *np = node_new_list(target, NULL);
- NCONS(*np).right = node_new_list(qn, NULL);
+ qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ (r == TK_INTERVAL ? 1 : 0));
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
+ r = set_qualifier(qn, *targetp, group, env);
+ if (r < 0) return r;
+
+ if (tok->u.repeat.possessive != 0) {
+ Node* en;
+ en = node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
+ NEFFECT(en).target = qn;
+ qn = en;
+ }
+
+ if (r == 0) {
+ *targetp = qn;
+ }
+ else if (r == 2) { /* split case: /abc+/ */
+ Node *tmp;
+
+ *targetp = node_new_list(*targetp, NULL);
+ CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
+ tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
+ CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
+ targetp = &(NCONS(tmp).left);
+ }
+ goto re_entry;
}
- goto re_entry;
}
return r;
}
static int
-parse_branch(Node** top, RegToken* tok, int term,
+parse_branch(Node** top, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
{
int r;
@@ -4127,7 +4723,7 @@ parse_branch(Node** top, RegToken* tok, int term,
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
-parse_subexp(Node** top, RegToken* tok, int term,
+parse_subexp(Node** top, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
{
int r;
@@ -4135,7 +4731,10 @@ parse_subexp(Node** top, RegToken* tok, int term,
*top = NULL;
r = parse_branch(&node, tok, term, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
if (r == term) {
*top = node;
@@ -4159,9 +4758,9 @@ parse_subexp(Node** top, RegToken* tok, int term,
else {
err:
if (term == TK_SUBEXP_CLOSE)
- return REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+ return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
else
- return REGERR_PARSER_BUG;
+ return ONIGERR_PARSER_BUG;
}
return r;
@@ -4171,7 +4770,7 @@ static int
parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
{
int r;
- RegToken tok;
+ OnigToken tok;
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
@@ -4181,13 +4780,13 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
}
extern int
-regex_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
+onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
ScanEnv* env)
{
int r;
UChar* p;
-#ifdef USE_NAMED_SUBEXP
+#ifdef USE_NAMED_GROUP
names_clear(reg);
#endif
@@ -4207,7 +4806,7 @@ regex_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
}
extern void
-regex_scan_env_set_error_string(ScanEnv* env, int ecode,
+onig_scan_env_set_error_string(ScanEnv* env, int ecode,
UChar* arg, UChar* arg_end)
{
env->error = arg;
diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h
index 5a073623c9..b2726becbd 100644
--- a/ext/mbstring/oniguruma/regparse.h
+++ b/ext/mbstring/oniguruma/regparse.h
@@ -2,7 +2,7 @@
regparse.h - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGPARSE_H
@@ -64,6 +64,7 @@
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
@@ -72,6 +73,14 @@
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
+#define CCLASS_SET_NOT(cc) (cc)->not = 1
+
+#define NQ_TARGET_ISNOT_EMPTY 0
+#define NQ_TARGET_IS_EMPTY 1
+#define NQ_TARGET_IS_EMPTY_MEM 2
+#define NQ_TARGET_IS_EMPTY_REC 3
+
+
typedef struct {
UChar* s;
UChar* end;
@@ -92,23 +101,26 @@ typedef struct {
int upper;
int greedy;
int by_number; /* {n,m} */
- int target_may_empty; /* target can match with empty data */
+ int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
} QualifierNode;
/* status bits */
-#define NST_RECURSION (1<<0)
-#define NST_CALLED (1<<1)
-#define NST_ADDR_FIXED (1<<2)
-#define NST_MIN_FIXED (1<<3)
-#define NST_MAX_FIXED (1<<4)
-#define NST_CLEN_FIXED (1<<5)
-#define NST_MARK1 (1<<6)
-#define NST_MARK2 (1<<7)
-#define NST_MEM_BACKREFED (1<<8)
-#define NST_SIMPLE_REPEAT (1<<9) /* for stop backtrack optimization */
+#define NST_MIN_FIXED (1<<0)
+#define NST_MAX_FIXED (1<<1)
+#define NST_CLEN_FIXED (1<<2)
+#define NST_MARK1 (1<<3)
+#define NST_MARK2 (1<<4)
+#define NST_MEM_BACKREFED (1<<5)
+#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
+
+#define NST_RECURSION (1<<7)
+#define NST_CALLED (1<<8)
+#define NST_ADDR_FIXED (1<<9)
+#define NST_NAMED_GROUP (1<<10)
+#define NST_NAME_REF (1<<11)
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@@ -122,20 +134,23 @@ typedef struct {
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
+#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
+#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
typedef struct {
int state;
int type;
int regnum;
- RegOptionType option;
+ OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
/* for multiple call reference */
- RegDistance min_len; /* min length (byte) */
- RegDistance max_len; /* max length (byte) */
+ OnigDistance min_len; /* min length (byte) */
+ OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;
@@ -209,10 +224,12 @@ typedef struct _Node {
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
- RegOptionType option;
- RegCharEncoding enc;
- RegSyntaxType* syntax;
- BitStatusType backtrack_mem;
+ OnigOptionType option;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
+ BitStatusType capture_history;
+ BitStatusType bt_mem_start;
+ BitStatusType bt_mem_end;
BitStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
@@ -224,6 +241,9 @@ typedef struct {
UnsetAddrList* unset_addr_list;
#endif
int num_mem;
+#ifdef USE_NAMED_GROUP
+ int num_named;
+#endif
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
@@ -234,21 +254,23 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
-
-extern void regex_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
-extern int regex_scan_unsigned_number P_((UChar** src, UChar* end, RegCharEncoding enc));
-extern void regex_node_conv_to_str_node P_((Node* node, int raw));
-extern int regex_node_str_cat P_((Node* node, UChar* s, UChar* end));
-extern void regex_node_free P_((Node* node));
-extern Node* regex_node_new_effect P_((int type));
-extern Node* regex_node_new_anchor P_((int type));
-extern int regex_free_node_list();
-extern int regex_names_free P_((regex_t* reg));
-extern int regex_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
-
-#ifdef REG_DEBUG
-#ifdef USE_NAMED_SUBEXP
-extern int regex_print_names(FILE*, regex_t*);
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
+extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
+extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
+extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
+extern void onig_node_conv_to_str_node P_((Node* node, int raw));
+extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
+extern void onig_node_free P_((Node* node));
+extern Node* onig_node_new_effect P_((int type));
+extern Node* onig_node_new_anchor P_((int type));
+extern int onig_free_node_list();
+extern int onig_names_free P_((regex_t* reg));
+extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
+
+#ifdef ONIG_DEBUG
+#ifdef USE_NAMED_GROUP
+extern int onig_print_names(FILE*, regex_t*);
#endif
#endif
diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c
index 007e7b65c0..533f813c0c 100644
--- a/ext/mbstring/oniguruma/regposerr.c
+++ b/ext/mbstring/oniguruma/regposerr.c
@@ -2,7 +2,7 @@
regposerr.c - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "config.h"
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
index ad22338132..3604ccfdbf 100644
--- a/ext/mbstring/oniguruma/regposix.c
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
@@ -17,7 +17,7 @@
#if 1
#define ENC_STRING_LEN(enc,s,len) do { \
UChar* tmps = (UChar* )(s); \
- /* while (*tmps != 0) tmps += mblen(enc,*tmps); */ \
+ /* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \
while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \
len = tmps - (UChar* )(s); \
} while(0)
@@ -34,57 +34,65 @@ static int
onig2posix_error_code(int code)
{
static O2PERR o2p[] = {
- { REG_MISMATCH, REG_NOMATCH },
- { REG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
- { REGERR_MEMORY, REG_ESPACE },
- { REGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
- { REGERR_TYPE_BUG, REG_EONIG_INTERNAL },
- { REGERR_PARSER_BUG, REG_EONIG_INTERNAL },
- { REGERR_STACK_BUG, REG_EONIG_INTERNAL },
- { REGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
- { REGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
- { REGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
- { REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
- { REGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
- { REGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
- { REGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
- { REGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
- { REGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
- { REGERR_END_PATTERN_AT_META, REG_EESCAPE },
- { REGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
- { REGERR_META_CODE_SYNTAX, REG_BADPAT },
- { REGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
- { REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
- { REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
- { REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
- { REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
- { REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
- { REGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
- { REGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
- { REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
- { REGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
- { REGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
- { REGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
- { REGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
- { REGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
- { REGERR_TOO_BIG_NUMBER, REG_BADPAT },
- { REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
- { REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
- { REGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
- { REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
- { REGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
- { REGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
- { REGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
- { REGERR_INVALID_BACKREF, REG_ESUBREG },
- { REGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_INVALID_SUBEXP_NAME, REG_BADPAT },
- { REGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
- { REGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
- { REGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
- { REGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
- { REGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
+ { ONIG_MISMATCH, REG_NOMATCH },
+ { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
+ { ONIGERR_MEMORY, REG_ESPACE },
+ { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
+ { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
+ { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
+ { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
+ { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
+ { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
+ { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
+ { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
+ { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
+ { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
+ { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
+ { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
+ { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
+ { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
+ { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
+ { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
+ { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
+ { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
+ { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
+
};
int i;
@@ -103,26 +111,27 @@ extern int
regcomp(regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
- RegSyntaxType* syntax = RegDefaultSyntax;
- RegOptionType options;
+ OnigSyntaxType* syntax = OnigDefaultSyntax;
+ OnigOptionType options;
if ((posix_options & REG_EXTENDED) == 0)
- syntax = REG_SYNTAX_POSIX_BASIC;
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
options = syntax->options;
if ((posix_options & REG_ICASE) != 0)
- REG_OPTION_ON(options, REG_OPTION_IGNORECASE);
+ ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
if ((posix_options & REG_NEWLINE) != 0) {
- REG_OPTION_ON( options, REG_OPTION_NEGATE_SINGLELINE);
- REG_OPTION_OFF(options, REG_OPTION_SINGLELINE);
+ ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
+ ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
}
reg->comp_options = posix_options;
- ENC_STRING_LEN(RegDefaultCharEncoding, pattern, len);
- r = regex_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
- options, RegDefaultCharEncoding, syntax, (RegErrorInfo* )NULL);
- if (r != REG_NORMAL) {
+ ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
+ r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
+ options, OnigEncDefaultCharEncoding, syntax,
+ (OnigErrorInfo* )NULL);
+ if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
@@ -136,11 +145,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
{
int r, i, len;
UChar* end;
- RegOptionType options;
+ OnigOptionType options;
- options = REG_OPTION_POSIX_REGION;
- if ((posix_options & REG_NOTBOL) != 0) options |= REG_OPTION_NOTBOL;
- if ((posix_options & REG_NOTEOL) != 0) options |= REG_OPTION_NOTEOL;
+ options = ONIG_OPTION_POSIX_REGION;
+ if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
+ if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if ((reg->comp_options & REG_NOSUB) != 0) {
pmatch = (regmatch_t* )NULL;
@@ -149,16 +158,16 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->code,str,len);
end = (UChar* )(str + len);
- r = regex_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
- (RegRegion* )pmatch, options);
+ r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
+ (OnigRegion* )pmatch, options);
if (r >= 0) {
r = 0; /* Match */
}
- else if (r == REG_MISMATCH) {
+ else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < nmatch; i++)
- pmatch[i].rm_so = pmatch[i].rm_eo = REG_REGION_NOTPOS;
+ pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
@@ -170,26 +179,74 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
extern void
regfree(regex_t* reg)
{
- regex_free(ONIG_C(reg));
+ onig_free(ONIG_C(reg));
}
extern void
reg_set_encoding(int mb_code)
{
- RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code];
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case REG_POSIX_ENCODING_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case REG_POSIX_ENCODING_EUC_JP:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case REG_POSIX_ENCODING_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case REG_POSIX_ENCODING_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
}
extern int
reg_name_to_group_numbers(regex_t* reg,
unsigned char* name, unsigned char* name_end, int** nums)
{
- return regex_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
+ return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
+}
+
+typedef struct {
+ int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+} i_wrap;
+
+static int i_wrapper(unsigned char* name, unsigned char* name_end,
+ int ng, int* gs,
+ onig_regex_t* reg, void* arg)
+{
+ i_wrap* warg = (i_wrap* )arg;
+
+ return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
+}
+
+extern int
+reg_foreach_name(regex_t* reg,
+ int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ i_wrap warg;
+
+ warg.func = func;
+ warg.reg = reg;
+ warg.arg = arg;
+
+ return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
}
extern int
-reg_foreach_name(regex_t* reg, int (*func)(unsigned char*,int,int*,void*),
- void* arg)
+reg_number_of_names(regex_t* reg)
{
- return regex_foreach_name(ONIG_C(reg), func, arg);
+ return onig_number_of_names(ONIG_C(reg));
}
diff --git a/ext/mbstring/oniguruma/sample/names.c b/ext/mbstring/oniguruma/sample/names.c
deleted file mode 100644
index 1ebc4e856c..0000000000
--- a/ext/mbstring/oniguruma/sample/names.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * names.c -- example of group name callback.
- */
-#include<stdio.h>
-#include "oniguruma.h"
-
-static int
-name_callback(UChar* name, int ngroup_num, int* group_nums, void* arg)
-{
- int i, gn;
- RegRegion *region = (RegRegion* )arg;
-
- for (i = 0; i < ngroup_num; i++) {
- gn = group_nums[i];
- fprintf(stderr, "%s (%d): ", name, gn);
- fprintf(stderr, "(%d-%d)\n", region->beg[gn], region->end[gn]);
- }
- return 0; /* 0: continue */
-}
-
-extern int main(int argc, char* argv[])
-{
- int r;
- unsigned char *start, *range, *end;
- regex_t* reg;
- RegErrorInfo einfo;
- RegRegion *region;
-
- static unsigned char* pattern = "(?<foo>a*)(?<bar>b*)(?<foo>c*)";
- static unsigned char* str = "aaabbbbcc";
-
- r = regex_new(&reg, pattern, pattern + strlen(pattern),
- REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo);
- if (r != REG_NORMAL) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r, &einfo);
- fprintf(stderr, "ERROR: %s\n", s);
- exit(-1);
- }
-
- region = regex_region_new();
-
- end = str + strlen(str);
- start = str;
- range = end;
- r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE);
- if (r >= 0) {
- fprintf(stderr, "match at %d\n\n", r);
- r = regex_foreach_name(reg, name_callback, (void* )region);
- }
- else if (r == REG_MISMATCH) {
- fprintf(stderr, "search fail\n");
- }
- else { /* error */
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r);
- exit(-1);
- }
-
- regex_region_free(region, 1 /* 1:free self, 0:free contents only */);
- regex_free(reg);
- regex_end();
- return 0;
-}
diff --git a/ext/mbstring/oniguruma/sample/posix.c b/ext/mbstring/oniguruma/sample/posix.c
deleted file mode 100644
index ff20292cb0..0000000000
--- a/ext/mbstring/oniguruma/sample/posix.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * posix.c
- */
-#include<stdio.h>
-#include "onigposix.h"
-
-static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
-{
- int r, i;
- char buf[200];
- regmatch_t pmatch[20];
-
- r = regexec(reg, str, reg->re_nsub + 1, pmatch, 0);
- if (r != 0 && r != REG_NOMATCH) {
- regerror(r, reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
-
- if (r == REG_NOMATCH) {
- fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str);
- }
- else {
- fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str);
- for (i = 0; i <= reg->re_nsub; i++) {
- fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo);
- }
- }
- return 0;
-}
-
-extern int main(int argc, char* argv[])
-{
- int r;
- char buf[200];
- regex_t reg;
- unsigned char* pattern;
-
- /* default syntax (REG_SYNTAX_RUBY) */
- pattern = "^a+b{2,7}[c-f]?$|uuu";
- r = regcomp(&reg, pattern, REG_EXTENDED);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
- x(&reg, pattern, "aaabbbbd");
-
- /* POSIX Basic RE (REG_EXTENDED is not specified.) */
- pattern = "^a+b{2,7}[c-f]?|uuu";
- r = regcomp(&reg, pattern, 0);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
- x(&reg, pattern, "a+b{2,7}d?|uuu");
-
- /* POSIX Basic RE (REG_EXTENDED is not specified.) */
- pattern = "^a*b\\{2,7\\}\\([c-f]\\)$";
- r = regcomp(&reg, pattern, 0);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
- x(&reg, pattern, "aaaabbbbbbd");
-
- /* POSIX Extended RE */
- regex_set_default_syntax(REG_SYNTAX_POSIX_EXTENDED);
- pattern = "^a+b{2,7}[c-f]?)$|uuu";
- r = regcomp(&reg, pattern, REG_EXTENDED);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
- x(&reg, pattern, "aaabbbbd)");
-
- pattern = "^b.";
- r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
- }
- x(&reg, pattern, "a\nb\n");
-
- regfree(&reg);
- regex_end();
- return 0;
-}
diff --git a/ext/mbstring/oniguruma/sample/simple.c b/ext/mbstring/oniguruma/sample/simple.c
deleted file mode 100644
index 89498bac11..0000000000
--- a/ext/mbstring/oniguruma/sample/simple.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * simple.c
- */
-#include<stdio.h>
-#include "oniguruma.h"
-
-extern int main(int argc, char* argv[])
-{
- int r;
- unsigned char *start, *range, *end;
- regex_t* reg;
- RegErrorInfo einfo;
- RegRegion *region;
-
- static unsigned char* pattern = "a(.*)b|[e-f]+";
- static unsigned char* str = "zzzzaffffffffb";
-
- r = regex_new(&reg, pattern, pattern + strlen(pattern),
- REG_OPTION_DEFAULT, REGCODE_ASCII, REG_SYNTAX_DEFAULT, &einfo);
- if (r != REG_NORMAL) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r, &einfo);
- fprintf(stderr, "ERROR: %s\n", s);
- exit(-1);
- }
-
- region = regex_region_new();
-
- end = str + strlen(str);
- start = str;
- range = end;
- r = regex_search(reg, str, end, start, range, region, REG_OPTION_NONE);
- if (r >= 0) {
- int i;
-
- fprintf(stderr, "match at %d\n", r);
- for (i = 0; i < region->num_regs; i++) {
- fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
- }
- }
- else if (r == REG_MISMATCH) {
- fprintf(stderr, "search fail\n");
- }
- else { /* error */
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r);
- exit(-1);
- }
-
- regex_region_free(region, 1 /* 1:free self, 0:free contents only */);
- regex_free(reg);
- regex_end();
- return 0;
-}
diff --git a/ext/mbstring/oniguruma/test.rb b/ext/mbstring/oniguruma/test.rb
deleted file mode 100644
index 2c69344407..0000000000
--- a/ext/mbstring/oniguruma/test.rb
+++ /dev/null
@@ -1,971 +0,0 @@
-# test.rb
-# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
-
-def pr(result, reg, str, n = 0, *range)
- printf("%s /%s/:'%s'", result, reg.source, str)
- if (n.class == Fixnum)
- printf(":%d", n) if n != 0
- if (range.size > 0)
- if (range[3].nil?)
- printf(" (%d-%d : X-X)", range[0], range[1])
- else
- printf(" (%d-%d : %d-%d)", range[0], range[1], range[2], range[3])
- end
- end
- else
- printf(" %s", n)
- end
- printf("\n")
-end
-
-def rok(result_opt, reg, str, n = 0, *range)
- result = "OK" + result_opt
- result += " " * (7 - result.length)
- pr(result, reg, str, n, *range)
- $rok += 1
-end
-
-def rfail(result_opt, reg, str, n = 0, *range)
- result = "FAIL" + result_opt
- result += " " * (7 - result.length)
- pr(result, reg, str, n, *range)
- $rfail += 1
-end
-
-def x(reg, str, s, e, n = 0)
- m = reg.match(str)
- if m
- if (m.size() <= n)
- rfail("(%d)" % (m.size()-1), reg, str, n)
- else
- if (m.begin(n) == s && m.end(n) == e)
- rok("", reg, str, n)
- else
- rfail("", reg, str, n, s, e, m.begin(n), m.end(n))
- end
- end
- else
- rfail("", reg, str, n)
- end
-end
-
-def n(reg, str)
- m = reg.match(str)
- if m
- rfail("(N)", reg, str, 0)
- else
- rok("(N)", reg, str, 0)
- end
-end
-
-def r(reg, str, index, pos = nil)
- if (pos)
- res = str.rindex(reg, pos)
- else
- res = str.rindex(reg)
- end
- if res
- if (res == index)
- rok("(r)", reg, str)
- else
- rfail("(r)", reg, str, [res, '-', index])
- end
- else
- rfail("(r)", reg, str)
- end
-end
-
-def i(reg, str, s = 0, e = 0, n = 0)
- # ignore
-end
-
-### main ###
-$rok = $rfail = 0
-
-
-def test_sb(enc)
-$KCODE = enc
-
-
-x(//, '', 0, 0)
-x(/^/, '', 0, 0)
-x(/$/, '', 0, 0)
-x(/\G/, '', 0, 0)
-x(/\A/, '', 0, 0)
-x(/\Z/, '', 0, 0)
-x(/\z/, '', 0, 0)
-x(/^$/, '', 0, 0)
-x(/\ca/, "\001", 0, 1)
-x(/\C-b/, "\002", 0, 1)
-x(/\M-Z/, "\xDA", 0, 1)
-x(//, 'a', 0, 0)
-x(/a/, 'a', 0, 1)
-x(/aa/, 'aa', 0, 2)
-x(/aaa/, 'aaa', 0, 3)
-x(/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 0, 35)
-x(/ab/, 'ab', 0, 2)
-x(/b/, 'ab', 1, 2)
-x(/bc/, 'abc', 1, 3)
-x(/\17/, "\017", 0, 1)
-x(/\x1f/, "\x1f", 0, 1)
-x(/\xFE/, "\xfe", 0, 1)
-x(/a(?#....\\JJJJ)b/, 'ab', 0, 2)
-x(/./, 'a', 0, 1)
-n(/./, '')
-x(/../, 'ab', 0, 2)
-x(/\w/, 'e', 0, 1)
-n(/\W/, 'e')
-x(/\s/, ' ', 0, 1)
-x(/\S/, 'b', 0, 1)
-x(/\d/, '4', 0, 1)
-n(/\D/, '4')
-x(/\b/, 'z ', 0, 0)
-x(/\b/, ' z', 1, 1)
-x(/\B/, 'zz ', 1, 1)
-x(/\B/, 'z ', 2, 2)
-x(/\B/, ' z', 0, 0)
-x(/[ab]/, 'b', 0, 1)
-n(/[ab]/, 'c')
-x(/[a-z]/, 't', 0, 1)
-n(/[^a]/, 'a')
-x(/[^a]/, "\n", 0, 1)
-x(/[]]/, ']', 0, 1)
-n(/[^]]/, ']')
-x(/[b-]/, 'b', 0, 1)
-x(/[b-]/, '-', 0, 1)
-x(/[\w]/, 'z', 0, 1)
-n(/[\w]/, ' ')
-x(/[\d]/, '5', 0, 1)
-n(/[\d]/, 'e')
-x(/[\D]/, 't', 0, 1)
-n(/[\D]/, '3')
-x(/[\s]/, ' ', 0, 1)
-n(/[\s]/, 'a')
-x(/[\S]/, 'b', 0, 1)
-n(/[\S]/, ' ')
-x(/[\w\d]/, '2', 0, 1)
-n(/[\w\d]/, ' ')
-x(/[[:upper:]]/, 'B', 0, 1)
-x(/[*[:xdigit:]+]/, '+', 0, 1)
-x(/[*[:xdigit:]+]/, 'GHIKK-9+*', 6, 7)
-x(/[*[:xdigit:]+]/, '-@^+', 3, 4)
-n(/[[:upper]]/, 'A')
-x(/[[:upper]]/, ':', 0, 1)
-x(/[\044-\047]/, "\046", 0, 1)
-x(/[\x5a-\x5c]/, "\x5b", 0, 1)
-x(/[\x6A-\x6D]/, "\x6c", 0, 1)
-n(/[\x6A-\x6D]/, "\x6E")
-n(/^[0-9A-F]+ 0+ UNDEF /, '75F 00000000 SECT14A notype () External | _rb_apply')
-x(/[\[]/, '[', 0, 1)
-x(/[\]]/, ']', 0, 1)
-x(/[&]/, '&', 0, 1)
-x(/[[ab]]/, 'b', 0, 1)
-x(/[[ab]c]/, 'c', 0, 1)
-n(/[[^a]]/, 'a')
-n(/[^[a]]/, 'a')
-x(/[[ab]&&bc]/, 'b', 0, 1)
-n(/[[ab]&&bc]/, 'a')
-n(/[[ab]&&bc]/, 'c')
-x(/[a-z&&b-y&&c-x]/, 'w', 0, 1)
-n(/[^a-z&&b-y&&c-x]/, 'w')
-x(/[[^a&&a]&&a-z]/, 'b', 0, 1)
-n(/[[^a&&a]&&a-z]/, 'a')
-x(/[[^a-z&&bcdef]&&[^c-g]]/, 'h', 0, 1)
-n(/[[^a-z&&bcdef]&&[^c-g]]/, 'c')
-x(/[^[^abc]&&[^cde]]/, 'c', 0, 1)
-x(/[^[^abc]&&[^cde]]/, 'e', 0, 1)
-n(/[^[^abc]&&[^cde]]/, 'f')
-x(/[a-&&-a]/, '-', 0, 1)
-n(/[a-&&-a]/, '&')
-n(/\wabc/, ' abc')
-x(/a\Wbc/, 'a bc', 0, 4)
-x(/a.b.c/, 'aabbc', 0, 5)
-x(/.\wb\W..c/, 'abb bcc', 0, 7)
-x(/\s\wzzz/, ' zzzz', 0, 5)
-x(/aa.b/, 'aabb', 0, 4)
-n(/.a/, 'ab')
-x(/.a/, 'aa', 0, 2)
-x(/^a/, 'a', 0, 1)
-x(/^a$/, 'a', 0, 1)
-x(/^\w$/, 'a', 0, 1)
-n(/^\w$/, ' ')
-x(/^\wab$/, 'zab', 0, 3)
-x(/^\wabcdef$/, 'zabcdef', 0, 7)
-x(/^\w...def$/, 'zabcdef', 0, 7)
-x(/\w\w\s\Waaa\d/, 'aa aaa4', 0, 8)
-x(/\A\Z/, '', 0, 0)
-x(/\Axyz/, 'xyz', 0, 3)
-x(/xyz\Z/, 'xyz', 0, 3)
-x(/xyz\z/, 'xyz', 0, 3)
-x(/\Gaz/, 'az', 0, 2)
-n(/\Gz/, 'bza')
-n(/az\G/, 'az')
-n(/az\A/, 'az')
-n(/a\Az/, 'az')
-x(/\^\$/, '^$', 0, 2)
-x(/\w/, '_', 0, 1)
-n(/\W/, '_')
-x(/(?=z)z/, 'z', 0, 1)
-n(/(?=z)./, 'a')
-x(/(?!z)a/, 'a', 0, 1)
-n(/(?!z)a/, 'z')
-x(/(?i:a)/, 'a', 0, 1)
-x(/(?i:a)/, 'A', 0, 1)
-x(/(?i:A)/, 'a', 0, 1)
-n(/(?i:A)/, 'b')
-x(/(?i:[A-Z])/, 'a', 0, 1)
-x(/(?i:[f-m])/, 'H', 0, 1)
-x(/(?i:[f-m])/, 'h', 0, 1)
-n(/(?i:[f-m])/, 'e')
-n(/(?i:[A-c])/, 'D') # changed spec. 2003/02/07
-n(/(?i:[a-C])/, 'D') # changed spec. 2003/02/07
-n(/(?i:[b-C])/, 'A')
-x(/(?i:[a-C])/, 'B', 0, 1)
-n(/(?i:[c-X])/, '[')
-n(/(?i:[!-k])/, 'Z')
-x(/(?i:[!-k])/, '7', 0, 1)
-n(/(?i:[T-}])/, 'b')
-x(/(?i:[T-}])/, '{', 0, 1)
-x(/(?i:\?a)/, '?A', 0, 2)
-x(/(?i:\*A)/, '*a', 0, 2)
-n(/./, "\n")
-x(/(?m:.)/, "\n", 0, 1)
-x(/(?m:a.)/, "a\n", 0, 2)
-x(/(?m:.b)/, "a\nb", 1, 3)
-x(/a?/, '', 0, 0)
-x(/a?/, 'b', 0, 0)
-x(/a?/, 'a', 0, 1)
-x(/a*/, '', 0, 0)
-x(/a*/, 'a', 0, 1)
-x(/a*/, 'aaa', 0, 3)
-x(/a*/, 'baaaa', 0, 0)
-n(/a+/, '')
-x(/a+/, 'a', 0, 1)
-x(/a+/, 'aaaa', 0, 4)
-x(/a+/, 'aabbb', 0, 2)
-x(/a+/, 'baaaa', 1, 5)
-x(/.?/, '', 0, 0)
-x(/.?/, 'f', 0, 1)
-x(/.?/, "\n", 0, 0)
-x(/.*/, '', 0, 0)
-x(/.*/, 'abcde', 0, 5)
-x(/.+/, 'z', 0, 1)
-x(/.+/, "zdswer\n", 0, 6)
-x(/a|b/, 'a', 0, 1)
-x(/a|b/, 'b', 0, 1)
-x(/|a/, 'a', 0, 0)
-x(/(|a)/, 'a', 0, 0)
-x(/ab|bc/, 'ab', 0, 2)
-x(/ab|bc/, 'bc', 0, 2)
-x(/z(?:ab|bc)/, 'zbc', 0, 3)
-x(/a(?:ab|bc)c/, 'aabc', 0, 4)
-x(/ab|(?:ac|az)/, 'az', 0, 2)
-x(/a|b|c/, 'dc', 1, 2)
-x(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'pqr', 0, 2)
-n(/a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz/, 'mn')
-x(/a|^z/, 'ba', 1, 2)
-x(/a|^z/, 'za', 0, 1)
-x(/a|\Gz/, 'bza', 2, 3)
-x(/a|\Gz/, 'za', 0, 1)
-x(/a|\Az/, 'bza', 2, 3)
-x(/a|\Az/, 'za', 0, 1)
-x(/a|b\Z/, 'ba', 1, 2)
-x(/a|b\Z/, 'b', 0, 1)
-x(/a|b\z/, 'ba', 1, 2)
-x(/a|b\z/, 'b', 0, 1)
-x(/\w|\s/, ' ', 0, 1)
-n(/\w|\w/, ' ')
-x(/\w|%/, '%', 0, 1)
-x(/\w|[&$]/, '&', 0, 1)
-x(/[b-d]|[^e-z]/, 'a', 0, 1)
-x(/(?:a|[c-f])|bz/, 'dz', 0, 1)
-x(/(?:a|[c-f])|bz/, 'bz', 0, 2)
-x(/abc|(?=zz)..f/, 'zzf', 0, 3)
-x(/abc|(?!zz)..f/, 'abf', 0, 3)
-x(/(?=za)..a|(?=zz)..a/, 'zza', 0, 3)
-n(/(?>a|abd)c/, 'abdc')
-x(/(?>abd|a)c/, 'abdc', 0, 4)
-x(/a?|b/, 'a', 0, 1)
-x(/a?|b/, 'b', 0, 0)
-x(/a?|b/, '', 0, 0)
-x(/a*|b/, 'aa', 0, 2)
-x(/a*|b*/, 'ba', 0, 0)
-x(/a*|b*/, 'ab', 0, 1)
-x(/a+|b*/, '', 0, 0)
-x(/a+|b*/, 'bbb', 0, 3)
-x(/a+|b*/, 'abbb', 0, 1)
-n(/a+|b+/, '')
-x(/(a|b)?/, 'b', 0, 1)
-x(/(a|b)*/, 'ba', 0, 2)
-x(/(a|b)+/, 'bab', 0, 3)
-x(/(ab|ca)+/, 'caabbc', 0, 4)
-x(/(ab|ca)+/, 'aabca', 1, 5)
-x(/(ab|ca)+/, 'abzca', 0, 2)
-x(/(a|bab)+/, 'ababa', 0, 5)
-x(/(a|bab)+/, 'ba', 1, 2)
-x(/(a|bab)+/, 'baaaba', 1, 4)
-x(/(?:a|b)(?:a|b)/, 'ab', 0, 2)
-x(/(?:a*|b*)(?:a*|b*)/, 'aaabbb', 0, 3)
-x(/(?:a*|b*)(?:a+|b+)/, 'aaabbb', 0, 6)
-x(/(?:a+|b+){2}/, 'aaabbb', 0, 6)
-x(/h{0,}/, 'hhhh', 0, 4)
-x(/(?:a+|b+){1,2}/, 'aaabbb', 0, 6)
-x(/(?:a+|\Ab*)cc/, 'cc', 0, 2)
-n(/(?:a+|\Ab*)cc/, 'abcc')
-x(/(?:^a+|b+)*c/, 'aabbbabc', 6, 8)
-x(/(?:^a+|b+)*c/, 'aabbbbc', 0, 7)
-x(/a|(?i)c/, 'C', 0, 1)
-x(/(?i)c|a/, 'C', 0, 1)
-i(/(?i)c|a/, 'A', 0, 1) # different spec.
-x(/(?i:c)|a/, 'C', 0, 1)
-n(/(?i:c)|a/, 'A')
-x(/[abc]?/, 'abc', 0, 1)
-x(/[abc]*/, 'abc', 0, 3)
-x(/[^abc]*/, 'abc', 0, 0)
-n(/[^abc]+/, 'abc')
-x(/a??/, 'aaa', 0, 0)
-x(/ba??b/, 'bab', 0, 3)
-x(/a*?/, 'aaa', 0, 0)
-x(/ba*?/, 'baa', 0, 1)
-x(/ba*?b/, 'baab', 0, 4)
-x(/a+?/, 'aaa', 0, 1)
-x(/ba+?/, 'baa', 0, 2)
-x(/ba+?b/, 'baab', 0, 4)
-x(/(?:a?)??/, 'a', 0, 0)
-x(/(?:a??)?/, 'a', 0, 0)
-x(/(?:a?)+?/, 'aaa', 0, 1)
-x(/(?:a+)??/, 'aaa', 0, 0)
-x(/(?:a+)??b/, 'aaab', 0, 4)
-i(/(?:ab)?{2}/, '', 0, 0) # GNU regex bug
-x(/(?:ab)?{2}/, 'ababa', 0, 4)
-x(/(?:ab)*{0}/, 'ababa', 0, 0)
-x(/(?:ab){3,}/, 'abababab', 0, 8)
-n(/(?:ab){3,}/, 'abab')
-x(/(?:ab){2,4}/, 'ababab', 0, 6)
-x(/(?:ab){2,4}/, 'ababababab', 0, 8)
-x(/(?:ab){2,4}?/, 'ababababab', 0, 4)
-x(/(?:ab){,}/, 'ab{,}', 0, 5)
-x(/(?:abc)+?{2}/, 'abcabcabc', 0, 6)
-x(/(?:X*)(?i:xa)/, 'XXXa', 0, 4)
-x(/(d+)([^abc]z)/, 'dddz', 0, 4)
-x(/([^abc]*)([^abc]z)/, 'dddz', 0, 4)
-x(/(\w+)(\wz)/, 'dddz', 0, 4)
-x(/(a)/, 'a', 0, 1, 1)
-x(/(ab)/, 'ab', 0, 2, 1)
-x(/((ab))/, 'ab', 0, 2)
-x(/((ab))/, 'ab', 0, 2, 1)
-x(/((ab))/, 'ab', 0, 2, 2)
-x(/((((((((((((((((((((ab))))))))))))))))))))/, 'ab', 0, 2, 20)
-x(/(ab)(cd)/, 'abcd', 0, 2, 1)
-x(/(ab)(cd)/, 'abcd', 2, 4, 2)
-x(/()(a)bc(def)ghijk/, 'abcdefghijk', 3, 6, 3)
-x(/(()(a)bc(def)ghijk)/, 'abcdefghijk', 3, 6, 4)
-x(/(^a)/, 'a', 0, 1)
-x(/(a)|(a)/, 'ba', 1, 2, 1)
-x(/(^a)|(a)/, 'ba', 1, 2, 2)
-x(/(a?)/, 'aaa', 0, 1, 1)
-x(/(a*)/, 'aaa', 0, 3, 1)
-x(/(a*)/, '', 0, 0, 1)
-x(/(a+)/, 'aaaaaaa', 0, 7, 1)
-x(/(a+|b*)/, 'bbbaa', 0, 3, 1)
-x(/(a+|b?)/, 'bbbaa', 0, 1, 1)
-x(/(abc)?/, 'abc', 0, 3, 1)
-x(/(abc)*/, 'abc', 0, 3, 1)
-x(/(abc)+/, 'abc', 0, 3, 1)
-x(/(xyz|abc)+/, 'abc', 0, 3, 1)
-x(/([xyz][abc]|abc)+/, 'abc', 0, 3, 1)
-x(/((?i:abc))/, 'AbC', 0, 3, 1)
-x(/(abc)(?i:\1)/, 'abcABC', 0, 6)
-x(/((?m:a.c))/, "a\nc", 0, 3, 1)
-x(/((?=az)a)/, 'azb', 0, 1, 1)
-x(/abc|(.abd)/, 'zabd', 0, 4, 1)
-x(/(?:abc)|(ABC)/, 'abc', 0, 3)
-x(/(?i:(abc))|(zzz)/, 'ABC', 0, 3, 1)
-x(/a*(.)/, 'aaaaz', 4, 5, 1)
-x(/a*?(.)/, 'aaaaz', 0, 1, 1)
-x(/a*?(c)/, 'aaaac', 4, 5, 1)
-x(/[bcd]a*(.)/, 'caaaaz', 5, 6, 1)
-x(/(\Abb)cc/, 'bbcc', 0, 2, 1)
-n(/(\Abb)cc/, 'zbbcc')
-x(/(^bb)cc/, 'bbcc', 0, 2, 1)
-n(/(^bb)cc/, 'zbbcc')
-x(/cc(bb$)/, 'ccbb', 2, 4, 1)
-n(/cc(bb$)/, 'ccbbb')
-#n(/\1/, 'a') # compile error on Oniguruma
-n(/(\1)/, '')
-n(/\1(a)/, 'aa')
-n(/(a(b)\1)\2+/, 'ababb')
-n(/(?:(?:\1|z)(a))+$/, 'zaa')
-x(/(?:(?:\1|z)(a))+$/, 'zaaa', 0, 4)
-x(/(a)(?=\1)/, 'aa', 0, 1)
-n(/(a)$|\1/, 'az')
-x(/(a)\1/, 'aa', 0, 2)
-n(/(a)\1/, 'ab')
-x(/(a?)\1/, 'aa', 0, 2)
-x(/(a??)\1/, 'aa', 0, 0)
-x(/(a*)\1/, 'aaaaa', 0, 4)
-x(/(a*)\1/, 'aaaaa', 0, 2, 1)
-x(/a(b*)\1/, 'abbbb', 0, 5)
-x(/a(b*)\1/, 'ab', 0, 1)
-x(/(a*)(b*)\1\2/, 'aaabbaaabb', 0, 10)
-x(/(a*)(b*)\2/, 'aaabbbb', 0, 7)
-x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 8)
-x(/(((((((a*)b))))))c\7/, 'aaabcaaa', 0, 3, 7)
-x(/(a)(b)(c)\2\1\3/, 'abcbac', 0, 6)
-x(/([a-d])\1/, 'cc', 0, 2)
-x(/(\w\d\s)\1/, 'f5 f5 ', 0, 6)
-n(/(\w\d\s)\1/, 'f5 f5')
-x(/(who|[a-c]{3})\1/, 'whowho', 0, 6)
-x(/...(who|[a-c]{3})\1/, 'abcwhowho', 0, 9)
-x(/(who|[a-c]{3})\1/, 'cbccbc', 0, 6)
-x(/(^a)\1/, 'aa', 0, 2)
-n(/(^a)\1/, 'baa')
-n(/(a$)\1/, 'aa')
-n(/(ab\Z)\1/, 'ab')
-x(/(a*\Z)\1/, 'a', 1, 1)
-x(/.(a*\Z)\1/, 'ba', 1, 2)
-x(/(.(abc)\2)/, 'zabcabc', 0, 7, 1)
-x(/(.(..\d.)\2)/, 'z12341234', 0, 9, 1)
-x(/((?i:az))\1/, 'AzAz', 0, 4)
-n(/((?i:az))\1/, 'Azaz')
-x(/(?<=a)b/, 'ab', 1, 2)
-n(/(?<=a)b/, 'bb')
-x(/(?<=a|b)b/, 'bb', 1, 2)
-x(/(?<=a|bc)b/, 'bcb', 2, 3)
-x(/(?<=a|bc)b/, 'ab', 1, 2)
-x(/(?<=a|bc||defghij|klmnopq|r)z/, 'rz', 1, 2)
-x(/(?<!a)b/, 'cb', 1, 2)
-n(/(?<!a)b/, 'ab')
-x(/(?<!a|bc)b/, 'bbb', 0, 1)
-n(/(?<!a|bc)z/, 'bcz')
-x(/(?<name1>a)/, 'a', 0, 1)
-x(/(?<name-2>ab)\1/, 'abab', 0, 4)
-x(/(?<name-3>.zv.)\k<name-3>/, 'azvbazvb', 0, 8)
-x(/(?<=\g<ab>)|-\zEND (?<ab>XyZ)/, 'XyZ', 3, 3)
-x(/(?<n>|a\g<n>)+/, '', 0, 0)
-x(/(?<n>|\(\g<n>\))+$/, '()(())', 0, 6)
-x(/\g<n>(?<n>.){0}/, 'X', 0, 1, 1)
-x(/\g<n>(abc|df(?<n>.YZ){2,8}){0}/, 'XYZ', 0, 3)
-x(/\A(?<n>(a\g<n>)|)\z/, 'aaaa', 0, 4)
-x(/(?<n>|\g<m>\g<n>)\z|\zEND (?<m>a|(b)\g<m>)/, 'bbbbabba', 0, 8)
-x(/(?<@:name[1240]>\w+\sx)a+\k<@:name[1240]>/, ' fg xaaaaaaaafg x', 2, 18)
-x(/(z)()()(?<9>a)\4/, 'zaa', 1, 2, 4)
-x(/(.)(((?<*>a)))\k<*>/, 'zaa', 0, 3)
-x(/((?<name1>\d)|(?<name2>\w))(\k<name1>|\k<name2>)/, 'ff', 0, 2)
-x(/(?:(?<x>)|(?<x>efg))\k<x>/, '', 0, 0)
-x(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefgefg', 3, 9)
-n(/(?:(?<@x>abc)|(?<@x>efg))\k<@x>/, 'abcefg')
-x(/(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\k<n1>$/, 'a-pyumpyum', 2, 10)
-x(/(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\k<n1>$/, 'xxxxabcdefghijklmnabcdefghijklmn', 4, 18, 14)
-x(/(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$/, 'aaa', 0, 3, 16)
-x(/(?<foo>a|\(\g<foo>\))/, 'a', 0, 1)
-x(/(?<foo>a|\(\g<foo>\))/, '((((((a))))))', 0, 13)
-x(/(?<foo>a|\(\g<foo>\))/, '((((((((a))))))))', 0, 17, 1)
-x(/\g<bar>|\zEND(?<bar>.*abc$)/, 'abcxxxabc', 0, 9)
-x(/\g<1>|\zEND(.a.)/, 'bac', 0, 3)
-x(/\g<2>\g<1>|\zEND(.a.)(?<?>.b.)/, 'xbxyay', 3, 6, 1)
-x(/\A(?:\g<pon>|\g<pan>|\zEND (?<pan>a|c\g<pon>c)(?<pon>b|d\g<pan>d))$/, 'cdcbcdc', 0, 7)
-x(/\A(?<n>|a\g<m>)\z|\zEND (?<m>\g<n>)/, 'aaaa', 0, 4)
-x(/(?<n>(a|b\g<n>c){3,5})/, 'baaaaca', 1, 5)
-x(/(?<n>(a|b\g<n>c){3,5})/, 'baaaacaaaaa', 0, 10)
-
-r(//, '', 0)
-r(/a/, 'a', 0)
-r(/a/, 'a', 0, 1)
-r(/b/, 'abc', 1)
-r(/b/, 'abc', 1, 2)
-r(/./, 'a', 0)
-r(/.*/, 'abcde fgh', 9)
-r(/a*/, 'aaabbc', 6)
-r(/a+/, 'aaabbc', 2)
-r(/a?/, 'bac', 3)
-r(/a??/, 'bac', 3)
-r(/abcde/, 'abcdeavcd', 0)
-r(/\w\d\s/, ' a2 aa $3 ', 2)
-r(/[c-f]aa[x-z]/, '3caaycaaa', 1)
-r(/(?i:fG)g/, 'fGgFggFgG', 3)
-r(/a|b/, 'b', 0)
-r(/ab|bc|cd/, 'bcc', 0)
-r(/(ffy)\1/, 'ffyffyffy', 3)
-r(/|z/, 'z', 1)
-r(/^az/, 'azaz', 0)
-r(/az$/, 'azaz', 2)
-r(/(((.a)))\3/, 'zazaaa', 0)
-r(/(ac*?z)\1/, 'aacczacczacz', 1)
-r(/aaz{3,4}/, 'bbaabbaazzzaazz', 6)
-r(/\000a/, "b\000a", 1)
-r(/ff\xfe/, "fff\xfe", 1)
-r(/...abcdefghijklmnopqrstuvwxyz/, 'zzzzzabcdefghijklmnopqrstuvwxyz', 2)
-end
-
-def test_euc(enc)
-$KCODE = enc
-
-x(//, 'あ', 0, 0)
-x(/あ/, 'あ', 0, 2)
-n(/い/, 'あ')
-x(/うう/, 'うう', 0, 4)
-x(/あいう/, 'あいう', 0, 6)
-x(/こここここここここここここここここここここここここここここここここここ/, 'こここここここここここここここここここここここここここここここここここ', 0, 70)
-x(/あ/, 'いあ', 2, 4)
-x(/いう/, 'あいう', 2, 6)
-x(/\xca\xb8/, "\xca\xb8", 0, 2)
-x(/./, 'あ', 0, 2)
-x(/../, 'かき', 0, 4)
-x(/\w/, 'お', 0, 2)
-n(/\W/, 'あ')
-x(/\S/, 'そ', 0, 2)
-x(/\S/, '漢', 0, 2)
-x(/\b/, '気 ', 0, 0)
-x(/\b/, ' ほ', 1, 1)
-x(/\B/, 'せそ ', 2, 2)
-x(/\B/, 'う ', 3, 3)
-x(/\B/, ' い', 0, 0)
-x(/[たち]/, 'ち', 0, 2)
-n(/[なに]/, 'ぬ')
-x(/[う-お]/, 'え', 0, 2)
-n(/[^け]/, 'け')
-x(/[\w]/, 'ね', 0, 2)
-n(/[\d]/, 'ふ')
-x(/[\D]/, 'は', 0, 2)
-n(/[\s]/, 'く')
-x(/[\S]/, 'へ', 0, 2)
-x(/[\w\d]/, 'よ', 0, 2)
-x(/[\w\d]/, ' よ', 3, 5)
-#x(/[\xa4\xcf-\xa4\xd3]/, "\xa4\xd0", 0, 2) # diff spec with GNU regex.
-#n(/[\xb6\xe7-\xb6\xef]/, "\xb6\xe5") # diff spec with GNU regex.
-n(/\w鬼車/, ' 鬼車')
-x(/鬼\W車/, '鬼 車', 0, 5)
-x(/あ.い.う/, 'ああいいう', 0, 10)
-x(/.\wう\W..ぞ/, 'えうう うぞぞ', 0, 13)
-x(/\s\wこここ/, ' ここここ', 0, 9)
-x(/ああ.け/, 'ああけけ', 0, 8)
-n(/.い/, 'いえ')
-x(/.お/, 'おお', 0, 4)
-x(/^あ/, 'あ', 0, 2)
-x(/^む$/, 'む', 0, 2)
-x(/^\w$/, 'に', 0, 2)
-x(/^\wかきくけこ$/, 'zかきくけこ', 0, 11)
-x(/^\w...うえお$/, 'zあいううえお', 0, 13)
-x(/\w\w\s\Wおおお\d/, 'aお おおお4', 0, 12)
-x(/\Aたちつ/, 'たちつ', 0, 6)
-x(/むめも\Z/, 'むめも', 0, 6)
-x(/かきく\z/, 'かきく', 0, 6)
-x(/かきく\Z/, "かきく\n", 0, 6)
-x(/\Gぽぴ/, 'ぽぴ', 0, 4)
-n(/\Gえ/, 'うえお')
-n(/とて\G/, 'とて')
-n(/まみ\A/, 'まみ')
-n(/ま\Aみ/, 'まみ')
-x(/(?=せ)せ/, 'せ', 0, 2)
-n(/(?=う)./, 'い')
-x(/(?!う)か/, 'か', 0, 2)
-n(/(?!と)あ/, 'と')
-x(/(?i:あ)/, 'あ', 0, 2)
-x(/(?i:ぶべ)/, 'ぶべ', 0, 4)
-n(/(?i:い)/, 'う')
-x(/(?m:よ.)/, "よ\n", 0, 3)
-x(/(?m:.め)/, "ま\nめ", 2, 5)
-x(/あ?/, '', 0, 0)
-x(/変?/, '化', 0, 0)
-x(/変?/, '変', 0, 2)
-x(/量*/, '', 0, 0)
-x(/量*/, '量', 0, 2)
-x(/子*/, '子子子', 0, 6)
-x(/馬*/, '鹿馬馬馬馬', 0, 0)
-n(/山+/, '')
-x(/河+/, '河', 0, 2)
-x(/時+/, '時時時時', 0, 8)
-x(/え+/, 'ええううう', 0, 4)
-x(/う+/, 'おうううう', 2, 10)
-x(/.?/, 'た', 0, 2)
-x(/.*/, 'ぱぴぷぺ', 0, 8)
-x(/.+/, 'ろ', 0, 2)
-x(/.+/, "いうえか\n", 0, 8)
-x(/あ|い/, 'あ', 0, 2)
-x(/あ|い/, 'い', 0, 2)
-x(/あい|いう/, 'あい', 0, 4)
-x(/あい|いう/, 'いう', 0, 4)
-x(/を(?:かき|きく)/, 'をかき', 0, 6)
-x(/を(?:かき|きく)け/, 'をきくけ', 0, 8)
-x(/あい|(?:あう|あを)/, 'あを', 0, 4)
-x(/あ|い|う/, 'えう', 2, 4)
-x(/あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね/, 'しすせ', 0, 6)
-n(/あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね/, 'すせ')
-x(/あ|^わ/, 'ぶあ', 2, 4)
-x(/あ|^を/, 'をあ', 0, 2)
-x(/鬼|\G車/, 'け車鬼', 4, 6)
-x(/鬼|\G車/, '車鬼', 0, 2)
-x(/鬼|\A車/, 'b車鬼', 3, 5)
-x(/鬼|\A車/, '車', 0, 2)
-x(/鬼|車\Z/, '車鬼', 2, 4)
-x(/鬼|車\Z/, '車', 0, 2)
-x(/鬼|車\Z/, "車\n", 0, 2)
-x(/鬼|車\z/, '車鬼', 2, 4)
-x(/鬼|車\z/, '車', 0, 2)
-x(/\w|\s/, 'お', 0, 2)
-x(/\w|%/, '%お', 0, 1)
-x(/\w|[&$]/, 'う&', 0, 2)
-x(/[い-け]/, 'う', 0, 2)
-x(/[い-け]|[^か-こ]/, 'あ', 0, 2)
-x(/[い-け]|[^か-こ]/, 'か', 0, 2)
-x(/(?:あ|[う-き])|いを/, 'うを', 0, 2)
-x(/(?:あ|[う-き])|いを/, 'いを', 0, 4)
-x(/あいう|(?=けけ)..ほ/, 'けけほ', 0, 6)
-x(/あいう|(?!けけ)..ほ/, 'あいほ', 0, 6)
-x(/(?=をあ)..あ|(?=をを)..あ/, 'ををあ', 0, 6)
-x(/(?<=あ|いう)い/, 'いうい', 4, 6)
-n(/(?>あ|あいえ)う/, 'あいえう')
-x(/(?>あいえ|あ)う/, 'あいえう', 0, 8)
-x(/あ?|い/, 'あ', 0, 2)
-x(/あ?|い/, 'い', 0, 0)
-x(/あ?|い/, '', 0, 0)
-x(/あ*|い/, 'ああ', 0, 4)
-x(/あ*|い*/, 'いあ', 0, 0)
-x(/あ*|い*/, 'あい', 0, 2)
-x(/[aあ]*|い*/, 'aあいいい', 0, 3)
-x(/あ+|い*/, '', 0, 0)
-x(/あ+|い*/, 'いいい', 0, 6)
-x(/あ+|い*/, 'あいいい', 0, 2)
-x(/あ+|い*/, 'aあいいい', 0, 0)
-n(/あ+|い+/, '')
-x(/(あ|い)?/, 'い', 0, 2)
-x(/(あ|い)*/, 'いあ', 0, 4)
-x(/(あ|い)+/, 'いあい', 0, 6)
-x(/(あい|うあ)+/, 'うああいうえ', 0, 8)
-x(/(あい|うえ)+/, 'うああいうえ', 4, 12)
-x(/(あい|うあ)+/, 'ああいうあ', 2, 10)
-x(/(あい|うあ)+/, 'あいをうあ', 0, 4)
-x(/(あい|うあ)+/, '$$zzzzあいをうあ', 6, 10)
-x(/(あ|いあい)+/, 'あいあいあ', 0, 10)
-x(/(あ|いあい)+/, 'いあ', 2, 4)
-x(/(あ|いあい)+/, 'いあああいあ', 2, 8)
-x(/(?:あ|い)(?:あ|い)/, 'あい', 0, 4)
-x(/(?:あ*|い*)(?:あ*|い*)/, 'あああいいい', 0, 6)
-x(/(?:あ*|い*)(?:あ+|い+)/, 'あああいいい', 0, 12)
-x(/(?:あ+|い+){2}/, 'あああいいい', 0, 12)
-x(/(?:あ+|い+){1,2}/, 'あああいいい', 0, 12)
-x(/(?:あ+|\Aい*)うう/, 'うう', 0, 4)
-n(/(?:あ+|\Aい*)うう/, 'あいうう')
-x(/(?:^あ+|い+)*う/, 'ああいいいあいう', 12, 16)
-x(/(?:^あ+|い+)*う/, 'ああいいいいう', 0, 14)
-x(/う{0,}/, 'うううう', 0, 8)
-x(/あ|(?i)c/, 'C', 0, 1)
-x(/(?i)c|あ/, 'C', 0, 1)
-x(/(?i:あ)|a/, 'a', 0, 1)
-n(/(?i:あ)|a/, 'A')
-x(/[あいう]?/, 'あいう', 0, 2)
-x(/[あいう]*/, 'あいう', 0, 6)
-x(/[^あいう]*/, 'あいう', 0, 0)
-n(/[^あいう]+/, 'あいう')
-x(/あ??/, 'あああ', 0, 0)
-x(/いあ??い/, 'いあい', 0, 6)
-x(/あ*?/, 'あああ', 0, 0)
-x(/いあ*?/, 'いああ', 0, 2)
-x(/いあ*?い/, 'いああい', 0, 8)
-x(/あ+?/, 'あああ', 0, 2)
-x(/いあ+?/, 'いああ', 0, 4)
-x(/いあ+?い/, 'いああい', 0, 8)
-x(/(?:天?)??/, '天', 0, 0)
-x(/(?:天??)?/, '天', 0, 0)
-x(/(?:夢?)+?/, '夢夢夢', 0, 2)
-x(/(?:風+)??/, '風風風', 0, 0)
-x(/(?:雪+)??霜/, '雪雪雪霜', 0, 8)
-i(/(?:あい)?{2}/, '', 0, 0) # GNU regex bug
-x(/(?:鬼車)?{2}/, '鬼車鬼車鬼', 0, 8)
-x(/(?:鬼車)*{0}/, '鬼車鬼車鬼', 0, 0)
-x(/(?:鬼車){3,}/, '鬼車鬼車鬼車鬼車', 0, 16)
-n(/(?:鬼車){3,}/, '鬼車鬼車')
-x(/(?:鬼車){2,4}/, '鬼車鬼車鬼車', 0, 12)
-x(/(?:鬼車){2,4}/, '鬼車鬼車鬼車鬼車鬼車', 0, 16)
-x(/(?:鬼車){2,4}?/, '鬼車鬼車鬼車鬼車鬼車', 0, 8)
-x(/(?:鬼車){,}/, '鬼車{,}', 0, 7)
-x(/(?:かきく)+?{2}/, 'かきくかきくかきく', 0, 12)
-x(/(火)/, '火', 0, 2, 1)
-x(/(火水)/, '火水', 0, 4, 1)
-x(/((時間))/, '時間', 0, 4)
-x(/((風水))/, '風水', 0, 4, 1)
-x(/((昨日))/, '昨日', 0, 4, 2)
-x(/((((((((((((((((((((量子))))))))))))))))))))/, '量子', 0, 4, 20)
-x(/(あい)(うえ)/, 'あいうえ', 0, 4, 1)
-x(/(あい)(うえ)/, 'あいうえ', 4, 8, 2)
-x(/()(あ)いう(えおか)きくけこ/, 'あいうえおかきくけこ', 6, 12, 3)
-x(/(()(あ)いう(えおか)きくけこ)/, 'あいうえおかきくけこ', 6, 12, 4)
-x(/.*(フォ)ン・マ(ン()シュタ)イン/, 'フォン・マンシュタイン', 10, 18, 2)
-x(/(^あ)/, 'あ', 0, 2)
-x(/(あ)|(あ)/, 'いあ', 2, 4, 1)
-x(/(^あ)|(あ)/, 'いあ', 2, 4, 2)
-x(/(あ?)/, 'あああ', 0, 2, 1)
-x(/(ま*)/, 'ままま', 0, 6, 1)
-x(/(と*)/, '', 0, 0, 1)
-x(/(る+)/, 'るるるるるるる', 0, 14, 1)
-x(/(ふ+|へ*)/, 'ふふふへへ', 0, 6, 1)
-x(/(あ+|い?)/, 'いいいああ', 0, 2, 1)
-x(/(あいう)?/, 'あいう', 0, 6, 1)
-x(/(あいう)*/, 'あいう', 0, 6, 1)
-x(/(あいう)+/, 'あいう', 0, 6, 1)
-x(/(さしす|あいう)+/, 'あいう', 0, 6, 1)
-x(/([なにぬ][かきく]|かきく)+/, 'かきく', 0, 6, 1)
-x(/((?i:あいう))/, 'あいう', 0, 6, 1)
-x(/((?m:あ.う))/, "あ\nう", 0, 5, 1)
-x(/((?=あん)あ)/, 'あんい', 0, 2, 1)
-x(/あいう|(.あいえ)/, 'んあいえ', 0, 8, 1)
-x(/あ*(.)/, 'ああああん', 8, 10, 1)
-x(/あ*?(.)/, 'ああああん', 0, 2, 1)
-x(/あ*?(ん)/, 'ああああん', 8, 10, 1)
-x(/[いうえ]あ*(.)/, 'えああああん', 10, 12, 1)
-x(/(\Aいい)うう/, 'いいうう', 0, 4, 1)
-n(/(\Aいい)うう/, 'んいいうう')
-x(/(^いい)うう/, 'いいうう', 0, 4, 1)
-n(/(^いい)うう/, 'んいいうう')
-x(/ろろ(るる$)/, 'ろろるる', 4, 8, 1)
-n(/ろろ(るる$)/, 'ろろるるる')
-x(/(無)\1/, '無無', 0, 4)
-n(/(無)\1/, '無武')
-x(/(空?)\1/, '空空', 0, 4)
-x(/(空??)\1/, '空空', 0, 0)
-x(/(空*)\1/, '空空空空空', 0, 8)
-x(/(空*)\1/, '空空空空空', 0, 4, 1)
-x(/あ(い*)\1/, 'あいいいい', 0, 10)
-x(/あ(い*)\1/, 'あい', 0, 2)
-x(/(あ*)(い*)\1\2/, 'あああいいあああいい', 0, 20)
-x(/(あ*)(い*)\2/, 'あああいいいい', 0, 14)
-x(/(あ*)(い*)\2/, 'あああいいいい', 6, 10, 2)
-x(/(((((((ぽ*)ぺ))))))ぴ\7/, 'ぽぽぽぺぴぽぽぽ', 0, 16)
-x(/(((((((ぽ*)ぺ))))))ぴ\7/, 'ぽぽぽぺぴぽぽぽ', 0, 6, 7)
-x(/(は)(ひ)(ふ)\2\1\3/, 'はひふひはふ', 0, 12)
-x(/([き-け])\1/, 'くく', 0, 4)
-x(/(\w\d\s)\1/, 'あ5 あ5 ', 0, 8)
-n(/(\w\d\s)\1/, 'あ5 あ5')
-x(/(誰?|[あ-う]{3})\1/, '誰?誰?', 0, 8)
-x(/...(誰?|[あ-う]{3})\1/, 'あaあ誰?誰?', 0, 13)
-x(/(誰?|[あ-う]{3})\1/, 'ういうういう', 0, 12)
-x(/(^こ)\1/, 'ここ', 0, 4)
-n(/(^む)\1/, 'めむむ')
-n(/(あ$)\1/, 'ああ')
-n(/(あい\Z)\1/, 'あい')
-x(/(あ*\Z)\1/, 'あ', 2, 2)
-x(/.(あ*\Z)\1/, 'いあ', 2, 4)
-x(/(.(やいゆ)\2)/, 'zやいゆやいゆ', 0, 13, 1)
-x(/(.(..\d.)\2)/, 'あ12341234', 0, 10, 1)
-x(/((?i:あvず))\1/, 'あvずあvず', 0, 10)
-x(/(?<愚か>変|\(\g<愚か>\))/, '((((((変))))))', 0, 14)
-x(/\A(?:\g<阿-1>|\g<云-2>|\z終了 (?<阿-1>観|自\g<云-2>自)(?<云-2>在|菩薩\g<阿-1>菩薩))$/, '菩薩自菩薩自在自菩薩自菩薩', 0, 26)
-x(/[[ひふ]]/, 'ふ', 0, 2)
-x(/[[いおう]か]/, 'か', 0, 2)
-n(/[[^あ]]/, 'あ')
-n(/[^[あ]]/, 'あ')
-x(/[^[^あ]]/, 'あ', 0, 2)
-x(/[[かきく]&&きく]/, 'く', 0, 2)
-n(/[[かきく]&&きく]/, 'か')
-n(/[[かきく]&&きく]/, 'け')
-x(/[あ-ん&&い-を&&う-ゑ]/, 'ゑ', 0, 2)
-n(/[^あ-ん&&い-を&&う-ゑ]/, 'ゑ')
-x(/[[^あ&&あ]&&あ-ん]/, 'い', 0, 2)
-n(/[[^あ&&あ]&&あ-ん]/, 'あ')
-x(/[[^あ-ん&&いうえお]&&[^う-か]]/, 'き', 0, 2)
-n(/[[^あ-ん&&いうえお]&&[^う-か]]/, 'い')
-x(/[^[^あいう]&&[^うえお]]/, 'う', 0, 2)
-x(/[^[^あいう]&&[^うえお]]/, 'え', 0, 2)
-n(/[^[^あいう]&&[^うえお]]/, 'か')
-x(/[あ-&&-あ]/, '-', 0, 1)
-x(/[^[^a-zあいう]&&[^bcdefgうえお]q-w]/, 'え', 0, 2)
-x(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, 'f', 0, 1)
-x(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, 'g', 0, 1)
-n(/[^[^a-zあいう]&&[^bcdefgうえお]g-w]/, '2')
-r(/あ/, 'あ', 0)
-r(/あ/, 'あ', 0, 2)
-r(/い/, 'あいう', 2)
-r(/い/, 'あいう', 2, 4)
-r(/./, 'あ', 0)
-r(/.*/, 'あいうえお かきく', 17)
-r(/.*えお/, 'あいうえお かきく', 6)
-r(/あ*/, 'あああいいう', 12)
-r(/あ+/, 'あああいいう', 4)
-r(/あ?/, 'いあう', 6)
-r(/全??/, '負全変', 6)
-r(/a辺c漢e/, 'a辺c漢eavcd', 0)
-r(/\w\d\s/, ' あ2 うう $3 ', 2)
-r(/[う-お]ああ[と-ん]/, '3うああなうあああ', 1)
-r(/あ|い/, 'い', 0)
-r(/あい|いう|うえ/, 'いうう', 0)
-r(/(ととち)\1/, 'ととちととちととち', 6)
-r(/|え/, 'え', 2)
-r(/^あず/, 'あずあず', 0)
-r(/あず$/, 'あずあず', 4)
-r(/(((.あ)))\3/, 'zあzあああ', 0)
-r(/(あう*?ん)\1/, 'ああううんあううんあうん', 2)
-r(/ああん{3,4}/, 'ててああいいああんんんああんああん', 12)
-r(/\000あ/, "い\000あ", 2)
-r(/とと\xfe\xfe/, "ととと\xfe\xfe", 2)
-r(/...あいうえおかきくけこさしすせそ/, 'zzzzzあいうえおかきくけこさしすせそ', 2)
-end
-
-test_sb('ASCII')
-test_sb('EUC')
-test_sb('SJIS')
-test_sb('UTF8')
-test_euc('EUC')
-
-
-# UTF-8 (by UENO Katsuhiro)
-$KCODE = 'UTF-8'
-
-s = "\xe3\x81\x82\xe3\x81\x81\xf0\x90\x80\x85\xe3\x81\x8a\xe3\x81\x85"
-x(/[\xc2\x80-\xed\x9f\xbf]+/u, s, 0, 6)
-
-s = "\xf0\x90\x80\x85\xe3\x81\x82"
-x(/[\xc2\x80-\xed\x9f\xbf]/u, s, 4, 7)
-
-s = "\xed\x9f\xbf"
-n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
-
-s = "\xed\x9f\xbf"
-n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
-
-s = "\xed\x9f\xbf"
-n(/[\xc2\x80-\xed\x9f\xbe]/u, s)
-
-s = "\xed\x9f\xbf"
-n(/[\xc3\xad\xed\x9f\xbe]/u, s)
-
-s = "\xed\x9f\xbf"
-n(/[\xc4\x80-\xed\x9f\xbe]/u, s)
-
-s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf"
-x(/[^\xc2\x80-\xed\x9f\xbe]/u, s, 0, 3)
-
-s = "\xed\x9f\xbf"
-x(/[^\xc3\xad\xed\x9f\xbe]/u, s, 0, 3)
-
-s = "\xed\x9f\xbf\xf0\x90\x80\x85\xed\x9f\xbf"
-x(/[^\xc4\x80-\xed\x9f\xbe]/u, s, 0, 3)
-
-s = "\xc3\xbe\xc3\xbf"
-n(/[\xfe\xff\xc3\x80]/u, s)
-
-
-# Japanese long text.
-$KCODE = 'EUC'
-
-s = <<EOS
-戦後の日本においては、旧軍については調査に基づかぬ批判も許される風潮も生じ、
-たとえば三十八年式歩兵銃の制定年が日露戦争の終った年であることをもって軽忽に
-旧軍の旧式ぶりを誇張する論評がまかりとおっている。
-有名な論者としては、故・司馬遼太郎を挙げることができるだろう。
-
-兵藤二十八 「有坂銃」 四谷ラウンド (1998)
-EOS
-
-x(/\((.+)\)/, s, 305, 309, 1)
-x(/司馬遼太郎/, s, 229, 239)
-x(/。$/, s, 202, 204)
-x(/(^兵藤..八)/, s, 269, 279, 1)
-x(/^$/, s, 268, 268)
-
-
-s = <<EOS
-カナやローマ字は一体文字であろうか。
-もしことばをしるすものが文字であるとすると、それはことばをしるすものではない。
-本やbookはことばであるが、ホンやhonは音をならべただけで、十分な単語性を
-もつものではない。
-単語としての特定の形態をもたないからである。
-「形による語」をアランは漢字に対する軽蔑的な意味に用いたが、
-形のないものは本当は語ではありえないのである。
-
-白川静 「漢字百話」
-EOS
-
-n(/\((.+)\)/, s)
-x(/「(.*)」/, s, 254, 264, 1)
-x(/。$/, s, 34, 36)
-x(/(book)/, s, 120, 124, 1)
-x(/^$/, s, 360, 360)
-
-
-s = <<EOS
-釈迦が叡山にくだってきたとすれば、そのおびただしい密教美術の量と、
-その質の高さにおどろくにちがいない。
-この覚者が、圧倒的な驚きをもつのは、お不動さんの像の前に立ったときだろう。
-−− これは、ドラヴィダ人の少年奴隷ではないか。
-
-司馬遼太郎 「叡山美術の展開−不動明王にふれつつ」 アサヒグラフ(1986)
-EOS
-
-x(/\((.+)\)/, s, 290, 296)
-x(/「(.*)−(.+)」/, s, 257, 275, 2)
-x(/^−− /, s, 179, 184)
-x(/(釈迦)/, s, 0, 4, 1)
-x(/\w、/, s, 30, 34)
-
-
-s = <<EOS
-かといって、所詮は、寺内君も、黒岩君も、そしてもう一人の人物も、口舌の徒にすぎないことを、この第七号は如実に物語っている。
-かれら三人の小説は一行も出ていないのだ。
-書くひまがなかったのであろう。
-しかし、雑誌「近代説話」が、なお第八号も第九号も出つづけてゆくであろうことについては、私はぶきみなほどの確信をもっている。この雑誌には、事務能力の魔物のような人物が、三人もいる。
-それを思うと、ときどきため息の出るようなおもいがするのである。
-
-司馬遼太郎 「こんな雑誌やめてしまいたい」 近代説話 第七集 (1961)
-EOS
-
-x(/\((\d+)\)/, s, 496, 502)
-x(/(「.+雑誌.*」)/, s, 449, 479, 1)
-x(/第(.)号/, s, 96, 98, 1)
-x(/。$/, s, 120, 122)
-x(/近代説話/, s, 209, 217)
-
-
-s = <<EOS
-二十五倍を越える莫大な量の下り塩に対抗する手立てに心づもりがあったのは、生き残っていた四十軒の地廻り塩問屋のうち伊勢屋の巴屋伊兵衛ただ一人だった。
-一口に地廻り塩といっても、江戸城御数寄屋に納入する御用塩と、江戸市中に流すものとは当然同じ物ではなかった。
-そもそもが戦物資を前提として考えられた行徳塩は、輸送する折に苦汁分が溶けだし目減りしたのでは話にならない。そこで、江戸城に納めるものは、焼きあげた塩を一夏葦簾囲いにした小屋に積み上げ、苦汁分を抜いて真塩に仕立て上げたものだった。
-
-飯嶋和一 「始祖鳥記」 (2000)
-EOS
-
-x(/\((\d+)\)/, s, 506, 512)
-x(/(「.*」)/, s, 493, 505, 1)
-x(/行徳塩/, s, 292, 298)
-
-
-s = <<EOS
-こうした日本人の武器に対する変わった態度の裏には、じつは、
-一貫した選択基準が働いていた。
-それは、その武器が「主兵を高級に見せるかどうか」であった。
-
-兵藤二十八 「有坂銃」 四谷ラウンド (1998)
-EOS
-
-x(/\((\d+)\)/, s, 185, 191)
-x(/(「.*」)/, s, 108, 138, 1)
-x(/^それは/, s, 90, 96)
-x(/^.*$/, s, 0, 58)
-
-s = <<EOS
- 稗は人も食い、馬の飼料にもしました。馬には稗一升に豆二合をたいてまぜたものを一日に一回はたべさせた。人間よりは上等のものをたべさせたもんであります。
- 人間は日頃はヘズリ飯をたべた。乾菜をゆでて、ゆでじるを馬にやり、菜をこまかに切り、菜と稗と米をまぜてたいてたべた。ずっと昔は米と稗が半々ぐらいであったが、明治も二十年代になると、稗をつくるのがへって来て、稗は米の三分の一くらいになった。ヘズリ飯には塩を少しいれたもんです。
-
-宮本常一 「忘れられた日本人」 (1960)
-EOS
-
-x(/(稗は米の三分の一くらいに)/, s, 357, 381, 1)
-x(/あります。$/, s, 140, 150)
-x(/ 人間(.*)。/, s, 157, 423, 1)
-x(/ヘズリ飯[をはで]/, s, 165, 175)
-
-s = <<EOS
-身はたとひ 武蔵の野辺に朽ぬとも 留置まし大和魂
-
-吉田松蔭 「留魂録」 (1859)
-EOS
-
-x(/\((.+)\)/, s, 68, 74)
-x(/「(.*)」/, s, 59, 65, 1)
-x(/^(吉田松蔭)/, s, 48, 56, 1)
-
-
-# result
-printf("\n*** Result SUCCESS: %d, FAIL: %d ***\n", $rok, $rfail)
-
-# END.
diff --git a/ext/mbstring/oniguruma/win32/testc.c b/ext/mbstring/oniguruma/testc.c
index 8ec392cd8c..e4d197e21d 100644
--- a/ext/mbstring/oniguruma/win32/testc.c
+++ b/ext/mbstring/oniguruma/testc.c
@@ -9,11 +9,14 @@
#include "oniguruma.h"
#endif
-static int nsucc = 0;
-static int nfail = 0;
+static int nsucc = 0;
+static int nfail = 0;
+static int nerror = 0;
+
+static FILE* err_file;
#ifndef POSIX_TEST
-static RegRegion* region;
+static OnigRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
@@ -28,15 +31,17 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
+ fprintf(err_file, "ERROR: %s\n", buf);
+ nerror++;
+ return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\n", buf);
- exit(-1);
+ fprintf(err_file, "ERROR: %s\n", buf);
+ nerror++;
+ return ;
}
if (r == REG_NOMATCH) {
@@ -70,28 +75,30 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
#else
regex_t* reg;
- RegErrorInfo einfo;
+ OnigErrorInfo einfo;
- r = regex_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
- REG_OPTION_DEFAULT, REGCODE_SJIS, REG_SYNTAX_DEFAULT, &einfo);
+ r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
+ ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r, &einfo);
- fprintf(stderr, "ERROR: %s\n", s);
- exit(-1);
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(s, r, &einfo);
+ fprintf(err_file, "ERROR: %s\n", s);
+ nerror++;
+ return ;
}
- r = regex_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
- (UChar* )str, (UChar* )(str + strlen(str)),
- region, REG_OPTION_NONE);
- if (r < REG_MISMATCH) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r);
- fprintf(stderr, "ERROR: %s\n", s);
- exit(-1);
+ r = onig_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
+ (UChar* )str, (UChar* )(str + strlen(str)),
+ region, ONIG_OPTION_NONE);
+ if (r < ONIG_MISMATCH) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(s, r);
+ fprintf(err_file, "ERROR: %s\n", s);
+ nerror++;
+ return ;
}
- if (r == REG_MISMATCH) {
+ if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
@@ -118,7 +125,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
}
}
}
- regex_free(reg);
+ onig_free(reg);
#endif
}
@@ -139,10 +146,12 @@ static void n(char* pattern, char* str)
extern int main(int argc, char* argv[])
{
+ err_file = stdout;
+
#ifdef POSIX_TEST
- reg_set_encoding(REG_ENCODING_SJIS);
+ reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
#else
- region = regex_region_new();
+ region = onig_region_new();
#endif
x2("", "", 0, 0);
@@ -168,6 +177,7 @@ extern int main(int argc, char* argv[])
x2("\\x1f", "\x1f", 0, 1);
x2("\\xFE", "\xfe", 0, 1);
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+ x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
x2(".", "a", 0, 1);
n(".", "");
x2("..", "ab", 0, 2);
@@ -189,10 +199,12 @@ extern int main(int argc, char* argv[])
x2("[^a]", "\n", 0, 1);
x2("[]]", "]", 0, 1);
n("[^]]", "]");
+ x2("[\\^]+", "0^^1", 1, 3);
x2("[b-]", "b", 0, 1);
x2("[b-]", "-", 0, 1);
x2("[\\w]", "z", 0, 1);
n("[\\w]", " ");
+ x2("[\\W]", "b$", 1, 2);
x2("[\\d]", "5", 0, 1);
n("[\\d]", "e");
x2("[\\D]", "t", 0, 1);
@@ -261,6 +273,8 @@ extern int main(int argc, char* argv[])
n("az\\A", "az");
n("a\\Az", "az");
x2("\\^\\$", "^$", 0, 2);
+ x2("^x?y", "xy", 0, 2);
+ x2("^(x?y)", "xy", 0, 2);
x2("\\w", "_", 0, 1);
n("\\W", "_");
x2("(?=z)z", "z", 0, 1);
@@ -275,12 +289,10 @@ extern int main(int argc, char* argv[])
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
n("(?i:[f-m])", "e");
- n("(?i:[b-C])", "A");
- x2("(?i:[a-C])", "B", 0, 1);
- n("(?i:[c-X])", "[");
- n("(?i:[!-k])", "Z");
+ x2("(?i:[A-c])", "D", 0, 1);
+ x2("(?i:[!-k])", "Z", 0, 1);
x2("(?i:[!-k])", "7", 0, 1);
- n("(?i:[T-}])", "b");
+ x2("(?i:[T-}])", "b", 0, 1);
x2("(?i:[T-}])", "{", 0, 1);
x2("(?i:\\?a)", "?A", 0, 2);
x2("(?i:\\*A)", "*a", 0, 2);
@@ -288,6 +300,8 @@ extern int main(int argc, char* argv[])
x2("(?m:.)", "\n", 0, 1);
x2("(?m:a.)", "a\n", 0, 2);
x2("(?m:.b)", "a\nb", 1, 3);
+ n("(?i)(?-i)a", "A");
+ n("(?i)(?-i:a)", "A");
x2("a?", "", 0, 0);
x2("a?", "b", 0, 0);
x2("a?", "a", 0, 1);
@@ -366,6 +380,10 @@ extern int main(int argc, char* argv[])
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
x2("h{0,}", "hhhh", 0, 4);
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+ n("ax{2}*a", "0axxxa1");
+ n("a.{0,2}a", "0aXXXa0");
+ n("a.{0,2}?a", "0aXXXa0");
+ n("a.{0,2}?a", "0aXXXXa0");
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
n("(?:a+|\\Ab*)cc", "abcc");
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
@@ -379,19 +397,19 @@ extern int main(int argc, char* argv[])
x2("[abc]*", "abc", 0, 3);
x2("[^abc]*", "abc", 0, 0);
n("[^abc]+", "abc");
- x2("a?\?", "aaa", 0, 0);
- x2("ba?\?b", "bab", 0, 3);
+ x2("a??", "aaa", 0, 0);
+ x2("ba??b", "bab", 0, 3);
x2("a*?", "aaa", 0, 0);
x2("ba*?", "baa", 0, 1);
x2("ba*?b", "baab", 0, 4);
x2("a+?", "aaa", 0, 1);
x2("ba+?", "baa", 0, 2);
x2("ba+?b", "baab", 0, 4);
- x2("(?:a?)?\?", "a", 0, 0);
- x2("(?:a?\?)?", "a", 0, 0);
+ x2("(?:a?)??", "a", 0, 0);
+ x2("(?:a??)?", "a", 0, 0);
x2("(?:a?)+?", "aaa", 0, 1);
- x2("(?:a+)?\?", "aaa", 0, 0);
- x2("(?:a+)?\?b", "aaab", 0, 4);
+ x2("(?:a+)??", "aaa", 0, 0);
+ x2("(?:a+)??b", "aaab", 0, 4);
x2("(?:ab)?{2}", "", 0, 0);
x2("(?:ab)?{2}", "ababa", 0, 4);
x2("(?:ab)*{0}", "ababa", 0, 0);
@@ -457,7 +475,7 @@ extern int main(int argc, char* argv[])
x2("(a)\\1", "aa", 0, 2);
n("(a)\\1", "ab");
x2("(a?)\\1", "aa", 0, 2);
- x2("(a?\?)\\1", "aa", 0, 0);
+ x2("(a??)\\1", "aa", 0, 0);
x2("(a*)\\1", "aaaaa", 0, 4);
x3("(a*)\\1", "aaaaa", 0, 2, 1);
x2("a(b*)\\1", "abbbb", 0, 5);
@@ -489,13 +507,14 @@ extern int main(int argc, char* argv[])
x2("(?<=a|bc)b", "bcb", 2, 3);
x2("(?<=a|bc)b", "ab", 1, 2);
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+ x2("(a)\\g<1>", "aa", 0, 2);
x2("(?<!a)b", "cb", 1, 2);
n("(?<!a)b", "ab");
x2("(?<!a|bc)b", "bbb", 0, 1);
n("(?<!a|bc)z", "bcz");
x2("(?<name1>a)", "a", 0, 1);
- x2("(?<name-2>ab)\\1", "abab", 0, 4);
- x2("(?<name-3>.zv.)\\k<name-3>", "azvbazvb", 0, 8);
+ x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+ x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
x2("(?<n>|a\\g<n>)+", "", 0, 0);
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
@@ -503,13 +522,13 @@ extern int main(int argc, char* argv[])
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
- x2("(?<@:name[1240]>\\w+\\sx)a+\\k<@:name[1240]>", " fg xaaaaaaaafg x", 2, 18);
- x3("(z)()()(?<9>a)\\4", "zaa", 1, 2, 4);
- x2("(.)(((?<*>a)))\\k<*>", "zaa", 0, 3);
+ x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
+ x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+ x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
- x2("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefgefg", 3, 9);
- n("(?:(?<@x>abc)|(?<@x>efg))\\k<@x>", "abcefg");
+ x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+ n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
@@ -518,286 +537,296 @@ extern int main(int argc, char* argv[])
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
- x3("\\g<2>\\g<1>|\\zEND(.a.)(?<?>.b.)", "xbxyay", 3, 6, 1);
+ x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
- x2("", "", 0, 0);
- x2("", "", 0, 2);
- n("", "");
- x2("", "", 0, 4);
- x2("", "", 0, 6);
- x2("", "", 0, 70);
- x2("", "", 2, 4);
- x2("", "", 2, 6);
+ x2("()*\\1", "", 0, 0);
+ x2("(?:()|())*\\1\\2", "", 0, 0);
+ x3("(?:\\1a|())*", "a", 0, 0, 1);
+ x2("x((.)*)*x", "0x1x2x3", 1, 6);
+ x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+ x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+ x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+ x2("", "あ", 0, 0);
+ x2("あ", "あ", 0, 2);
+ n("い", "あ");
+ x2("うう", "うう", 0, 4);
+ x2("あいう", "あいう", 0, 6);
+ x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
+ x2("あ", "いあ", 2, 4);
+ x2("いう", "あいう", 2, 6);
x2("\\xca\\xb8", "\xca\xb8", 0, 2);
- x2(".", "", 0, 2);
- x2("..", "", 0, 4);
- x2("\\w", "", 0, 2);
- n("\\W", "");
- x2("\\S", "", 0, 2);
- x2("\\S", "", 0, 2);
- x2("\\b", "C ", 0, 0);
- x2("\\b", " ", 1, 1);
- x2("\\B", " ", 2, 2);
- x2("\\B", " ", 3, 3);
- x2("\\B", " ", 0, 0);
- x2("[]", "", 0, 2);
- n("[]", "");
- x2("[-]", "", 0, 2);
- n("[^]", "");
- x2("[\\w]", "", 0, 2);
- n("[\\d]", "");
- x2("[\\D]", "", 0, 2);
- n("[\\s]", "");
- x2("[\\S]", "", 0, 2);
- x2("[\\w\\d]", "", 0, 2);
- x2("[\\w\\d]", " ", 3, 5);
- n("\\wSヤ", " Sヤ");
- x2("S\\Wヤ", "S ヤ", 0, 5);
- x2("..", "", 0, 10);
- x2(".\\w\\W..", " ", 0, 13);
- x2("\\s\\w", " ", 0, 9);
- x2(".", "", 0, 8);
- n(".", "");
- x2(".", "", 0, 4);
- x2("^", "", 0, 2);
- x2("^$", "", 0, 2);
- x2("^\\w$", "", 0, 2);
- x2("^\\w$", "z", 0, 11);
- x2("^\\w...$", "z", 0, 13);
- x2("\\w\\w\\s\\W\\d", "a 4", 0, 12);
- x2("\\A", "", 0, 6);
- x2("\\Z", "", 0, 6);
- x2("\\z", "", 0, 6);
- x2("\\Z", "\n", 0, 6);
- x2("\\G", "", 0, 4);
- n("\\G", "");
- n("\\G", "");
- n("\\A", "");
- n("\\A", "");
- x2("(?=)", "", 0, 2);
- n("(?=).", "");
- x2("(?!)", "", 0, 2);
- n("(?!)", "");
- x2("(?i:)", "", 0, 2);
- x2("(?i:)", "", 0, 4);
- n("(?i:)", "");
- x2("(?m:.)", "\n", 0, 3);
- x2("(?m:.)", "\n", 2, 5);
- x2("?", "", 0, 0);
- x2("?", "", 0, 0);
- x2("?", "", 0, 2);
- x2("*", "", 0, 0);
- x2("*", "", 0, 2);
- x2("q*", "qqq", 0, 6);
- x2("n*", "ュnnnn", 0, 0);
- n("R+", "");
- x2("+", "", 0, 2);
- x2("+", "", 0, 8);
- x2("+", "", 0, 4);
- x2("+", "", 2, 10);
- x2(".?", "", 0, 2);
- x2(".*", "", 0, 8);
- x2(".+", "", 0, 2);
- x2(".+", "\n", 0, 8);
- x2("|", "", 0, 2);
- x2("|", "", 0, 2);
- x2("|", "", 0, 4);
- x2("|", "", 0, 4);
- x2("(?:|)", "", 0, 6);
- x2("(?:|)", "", 0, 8);
- x2("|(?:|)", "", 0, 4);
- x2("||", "", 2, 4);
- x2("||||||||||", "", 0, 6);
- n("||||||||||", "");
- x2("|^", "", 2, 4);
- x2("|^", "", 0, 2);
- x2("S|\\Gヤ", "ヤS", 4, 6);
- x2("S|\\Gヤ", "ヤS", 0, 2);
- x2("S|\\Aヤ", "bヤS", 3, 5);
- x2("S|\\Aヤ", "ヤ", 0, 2);
- x2("S|ヤ\\Z", "ヤS", 2, 4);
- x2("S|ヤ\\Z", "ヤ", 0, 2);
- x2("S|ヤ\\Z", "ヤ\n", 0, 2);
- x2("S|ヤ\\z", "ヤS", 2, 4);
- x2("S|ヤ\\z", "ヤ", 0, 2);
- x2("\\w|\\s", "", 0, 2);
- x2("\\w|%", "%", 0, 1);
- x2("\\w|[&$]", "&", 0, 2);
- x2("[-]", "", 0, 2);
- x2("[-]|[^-]", "", 0, 2);
- x2("[-]|[^-]", "", 0, 2);
- x2("(?:|[-])|", "", 0, 2);
- x2("(?:|[-])|", "", 0, 4);
- x2("|(?=)..", "", 0, 6);
- x2("|(?!)..", "", 0, 6);
- x2("(?=)..|(?=)..", "", 0, 6);
- x2("(?<=|)", "", 4, 6);
- n("(?>|)", "");
- x2("(?>|)", "", 0, 8);
- x2("?|", "", 0, 2);
- x2("?|", "", 0, 0);
- x2("?|", "", 0, 0);
- x2("*|", "", 0, 4);
- x2("*|*", "", 0, 0);
- x2("*|*", "", 0, 2);
- x2("[a]*|*", "a", 0, 3);
- x2("+|*", "", 0, 0);
- x2("+|*", "", 0, 6);
- x2("+|*", "", 0, 2);
- x2("+|*", "a", 0, 0);
- n("+|+", "");
- x2("(|)?", "", 0, 2);
- x2("(|)*", "", 0, 4);
- x2("(|)+", "", 0, 6);
- x2("(|)+", "", 0, 8);
- x2("(|)+", "", 4, 12);
- x2("(|)+", "", 2, 10);
- x2("(|)+", "", 0, 4);
- x2("(|)+", "$$zzzz", 6, 10);
- x2("(|)+", "", 0, 10);
- x2("(|)+", "", 2, 4);
- x2("(|)+", "", 2, 8);
- x2("(?:|)(?:|)", "", 0, 4);
- x2("(?:*|*)(?:*|*)", "", 0, 6);
- x2("(?:*|*)(?:+|+)", "", 0, 12);
- x2("(?:+|+){2}", "", 0, 12);
- x2("(?:+|+){1,2}", "", 0, 12);
- x2("(?:+|\\A*)", "", 0, 4);
- n("(?:+|\\A*)", "");
- x2("(?:^+|+)*", "", 12, 16);
- x2("(?:^+|+)*", "", 0, 14);
- x2("{0,}", "", 0, 8);
- x2("|(?i)c", "C", 0, 1);
- x2("(?i)c|", "C", 0, 1);
- x2("(?i:)|a", "a", 0, 1);
- n("(?i:)|a", "A");
- x2("[]?", "", 0, 2);
- x2("[]*", "", 0, 6);
- x2("[^]*", "", 0, 0);
- n("[^]+", "");
- x2("?\?", "", 0, 0);
- x2("?\?", "", 0, 6);
- x2("*?", "", 0, 0);
- x2("*?", "", 0, 2);
- x2("*?", "", 0, 8);
- x2("+?", "", 0, 2);
- x2("+?", "", 0, 4);
- x2("+?", "", 0, 8);
- x2("(?:V?)?\?", "V", 0, 0);
- x2("(?:V?\?)?", "V", 0, 0);
- x2("(?:?)+?", "", 0, 2);
- x2("(?:+)?\?", "", 0, 0);
- x2("(?:+)?\?", "", 0, 8);
- x2("(?:)?{2}", "", 0, 0);
- x2("(?:Sヤ)?{2}", "SヤSヤS", 0, 8);
- x2("(?:Sヤ)*{0}", "SヤSヤS", 0, 0);
- x2("(?:Sヤ){3,}", "SヤSヤSヤSヤ", 0, 16);
- n("(?:Sヤ){3,}", "SヤSヤ");
- x2("(?:Sヤ){2,4}", "SヤSヤSヤ", 0, 12);
- x2("(?:Sヤ){2,4}", "SヤSヤSヤSヤSヤ", 0, 16);
- x2("(?:Sヤ){2,4}?", "SヤSヤSヤSヤSヤ", 0, 8);
- x2("(?:Sヤ){,}", "Sヤ{,}", 0, 7);
- x2("(?:)+?{2}", "", 0, 12);
- x3("()", "", 0, 2, 1);
- x3("()", "", 0, 4, 1);
- x2("(())", "", 0, 4);
- x3("(())", "", 0, 4, 1);
- x3("(())", "", 0, 4, 2);
- x3("((((((((((((((((((((q))))))))))))))))))))", "q", 0, 4, 20);
- x3("()()", "", 0, 4, 1);
- x3("()()", "", 4, 8, 2);
- x3("()()()", "", 6, 12, 3);
- x3("(()()())", "", 6, 12, 4);
- x3(".*(tH)E}(()V^)C", "tHE}V^C", 10, 18, 2);
- x2("(^)", "", 0, 2);
- x3("()|()", "", 2, 4, 1);
- x3("(^)|()", "", 2, 4, 2);
- x3("(?)", "", 0, 2, 1);
- x3("(*)", "", 0, 6, 1);
- x3("(*)", "", 0, 0, 1);
- x3("(+)", "", 0, 14, 1);
- x3("(+|*)", "", 0, 6, 1);
- x3("(+|?)", "", 0, 2, 1);
- x3("()?", "", 0, 6, 1);
- x3("()*", "", 0, 6, 1);
- x3("()+", "", 0, 6, 1);
- x3("(|)+", "", 0, 6, 1);
- x3("([][]|)+", "", 0, 6, 1);
- x3("((?i:))", "", 0, 6, 1);
- x3("((?m:.))", "\n", 0, 5, 1);
- x3("((?=))", "", 0, 2, 1);
- x3("|(.)", "", 0, 8, 1);
- x3("*(.)", "", 8, 10, 1);
- x3("*?(.)", "", 0, 2, 1);
- x3("*?()", "", 8, 10, 1);
- x3("[]*(.)", "", 10, 12, 1);
- x3("(\\A)", "", 0, 4, 1);
- n("(\\A)", "");
- x3("(^)", "", 0, 4, 1);
- n("(^)", "");
- x3("($)", "", 4, 8, 1);
- n("($)", "");
- x2("()\\1", "", 0, 4);
- n("()\\1", "");
- x2("(?)\\1", "", 0, 4);
- x2("(?\?)\\1", "", 0, 0);
- x2("(*)\\1", "", 0, 8);
- x3("(*)\\1", "", 0, 4, 1);
- x2("(*)\\1", "", 0, 10);
- x2("(*)\\1", "", 0, 2);
- x2("(*)(*)\\1\\2", "", 0, 20);
- x2("(*)(*)\\2", "", 0, 14);
- x3("(*)(*)\\2", "", 6, 10, 2);
- x2("(((((((*)))))))\\7", "", 0, 16);
- x3("(((((((*)))))))\\7", "", 0, 6, 7);
- x2("()()()\\2\\1\\3", "", 0, 12);
- x2("([-])\\1", "", 0, 4);
- x2("(\\w\\d\\s)\\1", "5 5 ", 0, 8);
- n("(\\w\\d\\s)\\1", "5 5");
- x2("(NH|[-]{3})\\1", "NHNH", 0, 8);
- x2("...(NH|[-]{3})\\1", "aNHNH", 0, 13);
- x2("(NH|[-]{3})\\1", "", 0, 12);
- x2("(^)\\1", "", 0, 4);
- n("(^)\\1", "");
- n("($)\\1", "");
- n("(\\Z)\\1", "");
- x2("(*\\Z)\\1", "", 2, 2);
- x2(".(*\\Z)\\1", "", 2, 4);
- x3("(.()\\2)", "z", 0, 13, 1);
- x3("(.(..\\d.)\\2)", "12341234", 0, 10, 1);
- x2("((?i:v))\\1", "vv", 0, 10);
- x2("(?<>|\\(\\g<>\\))", "(((((())))))", 0, 14);
- x2("\\A(?:\\g<-1>|\\g<]-2>|\\zI (?<-1>|ゥ\\g<]-2>ゥ)(?<]-2>|F\\g<-1>F))$", "FゥFゥゥFゥF", 0, 26);
- x2("[[]]", "", 0, 2);
- x2("[[]]", "", 0, 2);
- n("[[^]]", "");
- n("[^[]]", "");
- x2("[^[^]]", "", 0, 2);
- x2("[[]&&]", "", 0, 2);
- n("[[]&&]", "");
- n("[[]&&]", "");
- x2("[-&&-&&-]", "", 0, 2);
- n("[^-&&-&&-]", "");
- x2("[[^&&]&&-]", "", 0, 2);
- n("[[^&&]&&-]", "");
- x2("[[^-&&]&&[^-]]", "", 0, 2);
- n("[[^-&&]&&[^-]]", "");
- x2("[^[^]&&[^]]", "", 0, 2);
- x2("[^[^]&&[^]]", "", 0, 2);
- n("[^[^]&&[^]]", "");
- x2("[-&&-]", "-", 0, 1);
- x2("[^[^a-z]&&[^bcdefg]q-w]", "", 0, 2);
- x2("[^[^a-z]&&[^bcdefg]g-w]", "f", 0, 1);
- x2("[^[^a-z]&&[^bcdefg]g-w]", "g", 0, 1);
- n("[^[^a-z]&&[^bcdefg]g-w]", "2");
- fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail);
+ x2(".", "あ", 0, 2);
+ x2("..", "かき", 0, 4);
+ x2("\\w", "お", 0, 2);
+ n("\\W", "あ");
+ x2("[\\W]", "う$", 2, 3);
+ x2("\\S", "そ", 0, 2);
+ x2("\\S", "漢", 0, 2);
+ x2("\\b", "気 ", 0, 0);
+ x2("\\b", " ほ", 1, 1);
+ x2("\\B", "せそ ", 2, 2);
+ x2("\\B", "う ", 3, 3);
+ x2("\\B", " い", 0, 0);
+ x2("[たち]", "ち", 0, 2);
+ n("[なに]", "ぬ");
+ x2("[う-お]", "え", 0, 2);
+ n("[^け]", "け");
+ x2("[\\w]", "ね", 0, 2);
+ n("[\\d]", "ふ");
+ x2("[\\D]", "は", 0, 2);
+ n("[\\s]", "く");
+ x2("[\\S]", "へ", 0, 2);
+ x2("[\\w\\d]", "よ", 0, 2);
+ x2("[\\w\\d]", " よ", 3, 5);
+ n("\\w鬼車", " 鬼車");
+ x2("鬼\\W車", "鬼 車", 0, 5);
+ x2("あ.い.う", "ああいいう", 0, 10);
+ x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
+ x2("\\s\\wこここ", " ここここ", 0, 9);
+ x2("ああ.け", "ああけけ", 0, 8);
+ n(".い", "いえ");
+ x2(".お", "おお", 0, 4);
+ x2("^あ", "あ", 0, 2);
+ x2("^む$", "む", 0, 2);
+ x2("^\\w$", "に", 0, 2);
+ x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
+ x2("^\\w...うえお$", "zあいううえお", 0, 13);
+ x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
+ x2("\\Aたちつ", "たちつ", 0, 6);
+ x2("むめも\\Z", "むめも", 0, 6);
+ x2("かきく\\z", "かきく", 0, 6);
+ x2("かきく\\Z", "かきく\n", 0, 6);
+ x2("\\Gぽぴ", "ぽぴ", 0, 4);
+ n("\\Gえ", "うえお");
+ n("とて\\G", "とて");
+ n("まみ\\A", "まみ");
+ n("ま\\Aみ", "まみ");
+ x2("(?=せ)せ", "せ", 0, 2);
+ n("(?=う).", "い");
+ x2("(?!う)か", "か", 0, 2);
+ n("(?!と)あ", "と");
+ x2("(?i:あ)", "あ", 0, 2);
+ x2("(?i:ぶべ)", "ぶべ", 0, 4);
+ n("(?i:い)", "う");
+ x2("(?m:よ.)", "よ\n", 0, 3);
+ x2("(?m:.め)", "ま\nめ", 2, 5);
+ x2("あ?", "", 0, 0);
+ x2("変?", "化", 0, 0);
+ x2("変?", "変", 0, 2);
+ x2("量*", "", 0, 0);
+ x2("量*", "量", 0, 2);
+ x2("子*", "子子子", 0, 6);
+ x2("馬*", "鹿馬馬馬馬", 0, 0);
+ n("山+", "");
+ x2("河+", "河", 0, 2);
+ x2("時+", "時時時時", 0, 8);
+ x2("え+", "ええううう", 0, 4);
+ x2("う+", "おうううう", 2, 10);
+ x2(".?", "た", 0, 2);
+ x2(".*", "ぱぴぷぺ", 0, 8);
+ x2(".+", "ろ", 0, 2);
+ x2(".+", "いうえか\n", 0, 8);
+ x2("あ|い", "あ", 0, 2);
+ x2("あ|い", "い", 0, 2);
+ x2("あい|いう", "あい", 0, 4);
+ x2("あい|いう", "いう", 0, 4);
+ x2("を(?:かき|きく)", "をかき", 0, 6);
+ x2("を(?:かき|きく)け", "をきくけ", 0, 8);
+ x2("あい|(?:あう|あを)", "あを", 0, 4);
+ x2("あ|い|う", "えう", 2, 4);
+ x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
+ n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
+ x2("あ|^わ", "ぶあ", 2, 4);
+ x2("あ|^を", "をあ", 0, 2);
+ x2("鬼|\\G車", "け車鬼", 4, 6);
+ x2("鬼|\\G車", "車鬼", 0, 2);
+ x2("鬼|\\A車", "b車鬼", 3, 5);
+ x2("鬼|\\A車", "車", 0, 2);
+ x2("鬼|車\\Z", "車鬼", 2, 4);
+ x2("鬼|車\\Z", "車", 0, 2);
+ x2("鬼|車\\Z", "車\n", 0, 2);
+ x2("鬼|車\\z", "車鬼", 2, 4);
+ x2("鬼|車\\z", "車", 0, 2);
+ x2("\\w|\\s", "お", 0, 2);
+ x2("\\w|%", "%お", 0, 1);
+ x2("\\w|[&$]", "う&", 0, 2);
+ x2("[い-け]", "う", 0, 2);
+ x2("[い-け]|[^か-こ]", "あ", 0, 2);
+ x2("[い-け]|[^か-こ]", "か", 0, 2);
+ x2("[^あ]", "\n", 0, 1);
+ x2("(?:あ|[う-き])|いを", "うを", 0, 2);
+ x2("(?:あ|[う-き])|いを", "いを", 0, 4);
+ x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
+ x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
+ x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
+ x2("(?<=あ|いう)い", "いうい", 4, 6);
+ n("(?>あ|あいえ)う", "あいえう");
+ x2("(?>あいえ|あ)う", "あいえう", 0, 8);
+ x2("あ?|い", "あ", 0, 2);
+ x2("あ?|い", "い", 0, 0);
+ x2("あ?|い", "", 0, 0);
+ x2("あ*|い", "ああ", 0, 4);
+ x2("あ*|い*", "いあ", 0, 0);
+ x2("あ*|い*", "あい", 0, 2);
+ x2("[aあ]*|い*", "aあいいい", 0, 3);
+ x2("あ+|い*", "", 0, 0);
+ x2("あ+|い*", "いいい", 0, 6);
+ x2("あ+|い*", "あいいい", 0, 2);
+ x2("あ+|い*", "aあいいい", 0, 0);
+ n("あ+|い+", "");
+ x2("(あ|い)?", "い", 0, 2);
+ x2("(あ|い)*", "いあ", 0, 4);
+ x2("(あ|い)+", "いあい", 0, 6);
+ x2("(あい|うあ)+", "うああいうえ", 0, 8);
+ x2("(あい|うえ)+", "うああいうえ", 4, 12);
+ x2("(あい|うあ)+", "ああいうあ", 2, 10);
+ x2("(あい|うあ)+", "あいをうあ", 0, 4);
+ x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
+ x2("(あ|いあい)+", "あいあいあ", 0, 10);
+ x2("(あ|いあい)+", "いあ", 2, 4);
+ x2("(あ|いあい)+", "いあああいあ", 2, 8);
+ x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
+ x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
+ x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
+ x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
+ x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
+ x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
+ n("(?:あ+|\\Aい*)うう", "あいうう");
+ x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
+ x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
+ x2("う{0,}", "うううう", 0, 8);
+ x2("あ|(?i)c", "C", 0, 1);
+ x2("(?i)c|あ", "C", 0, 1);
+ x2("(?i:あ)|a", "a", 0, 1);
+ n("(?i:あ)|a", "A");
+ x2("[あいう]?", "あいう", 0, 2);
+ x2("[あいう]*", "あいう", 0, 6);
+ x2("[^あいう]*", "あいう", 0, 0);
+ n("[^あいう]+", "あいう");
+ x2("あ??", "あああ", 0, 0);
+ x2("いあ??い", "いあい", 0, 6);
+ x2("あ*?", "あああ", 0, 0);
+ x2("いあ*?", "いああ", 0, 2);
+ x2("いあ*?い", "いああい", 0, 8);
+ x2("あ+?", "あああ", 0, 2);
+ x2("いあ+?", "いああ", 0, 4);
+ x2("いあ+?い", "いああい", 0, 8);
+ x2("(?:天?)??", "天", 0, 0);
+ x2("(?:天??)?", "天", 0, 0);
+ x2("(?:夢?)+?", "夢夢夢", 0, 2);
+ x2("(?:風+)??", "風風風", 0, 0);
+ x2("(?:雪+)??霜", "雪雪雪霜", 0, 8);
+ x2("(?:あい)?{2}", "", 0, 0);
+ x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
+ x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
+ x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
+ n("(?:鬼車){3,}", "鬼車鬼車");
+ x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
+ x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
+ x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
+ x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
+ x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
+ x3("(火)", "火", 0, 2, 1);
+ x3("(火水)", "火水", 0, 4, 1);
+ x2("((時間))", "時間", 0, 4);
+ x3("((風水))", "風水", 0, 4, 1);
+ x3("((昨日))", "昨日", 0, 4, 2);
+ x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
+ x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
+ x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
+ x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
+ x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
+ x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
+ x2("(^あ)", "あ", 0, 2);
+ x3("(あ)|(あ)", "いあ", 2, 4, 1);
+ x3("(^あ)|(あ)", "いあ", 2, 4, 2);
+ x3("(あ?)", "あああ", 0, 2, 1);
+ x3("(ま*)", "ままま", 0, 6, 1);
+ x3("(と*)", "", 0, 0, 1);
+ x3("(る+)", "るるるるるるる", 0, 14, 1);
+ x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
+ x3("(あ+|い?)", "いいいああ", 0, 2, 1);
+ x3("(あいう)?", "あいう", 0, 6, 1);
+ x3("(あいう)*", "あいう", 0, 6, 1);
+ x3("(あいう)+", "あいう", 0, 6, 1);
+ x3("(さしす|あいう)+", "あいう", 0, 6, 1);
+ x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
+ x3("((?i:あいう))", "あいう", 0, 6, 1);
+ x3("((?m:あ.う))", "あ\nう", 0, 5, 1);
+ x3("((?=あん)あ)", "あんい", 0, 2, 1);
+ x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
+ x3("あ*(.)", "ああああん", 8, 10, 1);
+ x3("あ*?(.)", "ああああん", 0, 2, 1);
+ x3("あ*?(ん)", "ああああん", 8, 10, 1);
+ x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
+ x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
+ n("(\\Aいい)うう", "んいいうう");
+ x3("(^いい)うう", "いいうう", 0, 4, 1);
+ n("(^いい)うう", "んいいうう");
+ x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
+ n("ろろ(るる$)", "ろろるるる");
+ x2("(無)\\1", "無無", 0, 4);
+ n("(無)\\1", "無武");
+ x2("(空?)\\1", "空空", 0, 4);
+ x2("(空??)\\1", "空空", 0, 0);
+ x2("(空*)\\1", "空空空空空", 0, 8);
+ x3("(空*)\\1", "空空空空空", 0, 4, 1);
+ x2("あ(い*)\\1", "あいいいい", 0, 10);
+ x2("あ(い*)\\1", "あい", 0, 2);
+ x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
+ x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
+ x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
+ x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
+ x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
+ x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
+ x2("([き-け])\\1", "くく", 0, 4);
+ x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
+ n("(\\w\\d\\s)\\1", "あ5 あ5");
+ x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
+ x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13);
+ x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
+ x2("(^こ)\\1", "ここ", 0, 4);
+ n("(^む)\\1", "めむむ");
+ n("(あ$)\\1", "ああ");
+ n("(あい\\Z)\\1", "あい");
+ x2("(あ*\\Z)\\1", "あ", 2, 2);
+ x2(".(あ*\\Z)\\1", "いあ", 2, 4);
+ x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
+ x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
+ x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
+ x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
+ x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
+ x2("[[ひふ]]", "ふ", 0, 2);
+ x2("[[いおう]か]", "か", 0, 2);
+ n("[[^あ]]", "あ");
+ n("[^[あ]]", "あ");
+ x2("[^[^あ]]", "あ", 0, 2);
+ x2("[[かきく]&&きく]", "く", 0, 2);
+ n("[[かきく]&&きく]", "か");
+ n("[[かきく]&&きく]", "け");
+ x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2);
+ n("[^あ-ん&&い-を&&う-ゑ]", "ゑ");
+ x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2);
+ n("[[^あ&&あ]&&あ-ん]", "あ");
+ x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2);
+ n("[[^あ-ん&&いうえお]&&[^う-か]]", "い");
+ x2("[^[^あいう]&&[^うえお]]", "う", 0, 2);
+ x2("[^[^あいう]&&[^うえお]]", "え", 0, 2);
+ n("[^[^あいう]&&[^うえお]]", "か");
+ x2("[あ-&&-あ]", "-", 0, 1);
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2);
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
+ n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
+ fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d\n",
+ nsucc, nfail, nerror);
#ifndef POSIX_TEST
- regex_region_free(region, 1);
- regex_end();
+ onig_region_free(region, 1);
+ onig_end();
#endif
return 0;
diff --git a/ext/mbstring/oniguruma/testconv.rb b/ext/mbstring/oniguruma/testconv.rb
deleted file mode 100644
index afaa673d90..0000000000
--- a/ext/mbstring/oniguruma/testconv.rb
+++ /dev/null
@@ -1,223 +0,0 @@
-#!/usr/local/bin/ruby -Ke
-# testconv.rb
-# Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
-
-WINDOWS = (ARGV.size > 0 && /^-win/i =~ ARGV[0])
-ARGV.shift if WINDOWS
-
-if WINDOWS
- REGCODE = 'REGCODE_SJIS'
- REGENC = 'REG_ENCODING_SJIS'
-else
- REGCODE = 'REGCODE_EUCJP'
- REGENC = 'REG_ENCODING_EUC_JP'
-end
-
-def conv_reg(s)
- s = s.gsub(/\\/, '\\\\\\\\') #'
- if (WINDOWS)
- s = s.gsub(/\?\?/, '?\\\\?') # escape ANSI trigraph
- end
- s
-end
-
-def conv_str(s)
- if (s[0] == ?')
- s = s[1..-2]
- return s.gsub(/\\/, '\\\\\\\\') #'
- else
- return s[1..-2]
- end
-end
-
-print(<<"EOS")
-/*
- * This program was generated by testconv.rb.
- */
-#include<stdio.h>
-
-#ifdef POSIX_TEST
-#include "onigposix.h"
-#else
-#include "oniguruma.h"
-#endif
-
-static int nsucc = 0;
-static int nfail = 0;
-
-#ifndef POSIX_TEST
-static RegRegion* region;
-#endif
-
-static void xx(char* pattern, char* str, int from, int to, int mem, int not)
-{
- int r;
-
-#ifdef POSIX_TEST
- regex_t reg;
- char buf[200];
- regmatch_t pmatch[20];
-
- r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\\n", buf);
- exit(-1);
- }
-
- r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
- if (r != 0 && r != REG_NOMATCH) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(stderr, "ERROR: %s\\n", buf);
- exit(-1);
- }
-
- if (r == REG_NOMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str);
- nfail++;
- }
- else {
- if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
- fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str,
- from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
- nfail++;
- }
- }
- }
- regfree(&reg);
-
-#else
- regex_t* reg;
- RegErrorInfo einfo;
-
- r = regex_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
- REG_OPTION_DEFAULT, #{REGCODE}, REG_SYNTAX_DEFAULT, &einfo);
- if (r) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r, &einfo);
- fprintf(stderr, "ERROR: %s\\n", s);
- exit(-1);
- }
-
- r = regex_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
- (UChar* )str, (UChar* )(str + strlen(str)),
- region, REG_OPTION_NONE);
- if (r < REG_MISMATCH) {
- char s[REG_MAX_ERROR_MESSAGE_LEN];
- regex_error_code_to_str(s, r);
- fprintf(stderr, "ERROR: %s\\n", s);
- exit(-1);
- }
-
- if (r == REG_MISMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\\n", pattern, str);
- nfail++;
- }
- else {
- if (region->beg[mem] == from && region->end[mem] == to) {
- fprintf(stdout, "OK: /%s/ '%s'\\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\\n", pattern, str,
- from, to, region->beg[mem], region->end[mem]);
- nfail++;
- }
- }
- }
- regex_free(reg);
-#endif
-}
-
-static void x2(char* pattern, char* str, int from, int to)
-{
- xx(pattern, str, from, to, 0, 0);
-}
-
-static void x3(char* pattern, char* str, int from, int to, int mem)
-{
- xx(pattern, str, from, to, mem, 0);
-}
-
-static void n(char* pattern, char* str)
-{
- xx(pattern, str, 0, 0, 0, 1);
-}
-
-extern int main(int argc, char* argv[])
-{
-#ifdef POSIX_TEST
- reg_set_encoding(#{REGENC});
-#else
- region = regex_region_new();
-#endif
-
-EOS
-
-CM = '\s*,\s*'
-RX2 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)$}
-RI2 = %r{^i\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)\)}
-RX3 = %r{^x\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")#{CM}(\S+)#{CM}(\S+)#{CM}(\S+)\)$}
-RN = %r{^n\(/([^\/]*)/#{CM}('[^']*'|"[^"]*")\)$} #'
-
-while line = gets()
- if (m = RX2.match(line))
- reg = conv_reg(m[1])
- str = conv_str(m[2])
- printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4])
- elsif (m = RI2.match(line))
- reg = conv_reg(m[1])
- str = conv_str(m[2])
- printf(" x2(\"%s\", \"%s\", %s, %s);\n", reg, str, m[3], m[4])
- elsif (m = RX3.match(line))
- reg = conv_reg(m[1])
- str = conv_str(m[2])
- printf(" x3(\"%s\", \"%s\", %s, %s, %s);\n", reg, str, m[3], m[4], m[5])
- elsif (m = RN.match(line))
- reg = conv_reg(m[1])
- str = conv_str(m[2])
- printf(" n(\"%s\", \"%s\");\n", reg, str)
- else
-
- end
-end
-
-print(<<'EOS')
- fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d\n", nsucc, nfail);
-
-#ifndef POSIX_TEST
- regex_region_free(region, 1);
- regex_end();
-#endif
-
- return 0;
-}
-EOS
-
-# END OF SCRIPT
diff --git a/ext/mbstring/oniguruma/win32/Makefile b/ext/mbstring/oniguruma/win32/Makefile
deleted file mode 100644
index bb20474e8f..0000000000
--- a/ext/mbstring/oniguruma/win32/Makefile
+++ /dev/null
@@ -1,131 +0,0 @@
-# Oniguruma Makefile for Win32
-
-product_name = oniguruma
-
-CPPFLAGS =
-CFLAGS = -O2 -nologo
-LDFLAGS =
-LOADLIBES =
-ARLIB = lib
-ARLIB_FLAGS = -nologo
-ARDLL = cl
-ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
-LINKFLAGS = -link -incremental:no -pdb:none
-
-INSTALL = install -c
-CP = copy
-CC = cl
-DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
-RUBYDIR = ..
-
-subdirs =
-
-libbase = onig
-libname = $(libbase)_s.lib
-dllname = $(libbase).dll
-dlllib = $(libbase).lib
-
-onigheaders = oniguruma.h regint.h regparse.h
-posixheaders = onigposix.h
-headers = $(posixheaders) $(onigheaders)
-
-onigobjs = reggnu.obj regerror.obj regparse.obj regcomp.obj regexec.obj
-posixobjs = regposix.obj regposerr.obj
-libobjs = $(onigobjs) $(posixobjs)
-
-onigsources = regerror.c regparse.c regcomp.c regexec.c reggnu.c
-posixsources = regposix.c regposerr.c
-libsources = $(posixsources) $(onigsources)
-rubysources = regex.c $(onigsources)
-
-patchfiles = re.c.168.patch re.c.180.patch
-distfiles = README COPYING INSTALL-RUBY HISTORY \
- Makefile.in configure.in config.h.in configure \
- $(headers) $(libsources) regex.c $(patchfiles) \
- test.rb testconv.rb
-testc = testc
-testp = testp
-
-makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
-
-.SUFFIXES:
-.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
-
-.c.obj:
- $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /c $<
-
-# targets
-default: all
-
-all: $(libname) $(dllname)
-
-$(libname): $(libobjs)
- $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs)
-
-$(dllname): $(libobjs)
- $(ARDLL) $(libobjs) -Fe$@ $(ARDLL_FLAGS)
-
-regparse.obj: regparse.c $(onigheaders) config.h
-regcomp.obj: regcomp.c $(onigheaders) config.h
-regexec.obj: regexec.c regint.h oniguruma.h config.h
-reggnu.obj: reggnu.c regint.h oniguruma.h config.h
-regerror.obj: regerror.c regint.h oniguruma.h config.h
-regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h
-regposerr.obj: regposerr.c $(posixheaders) config.h
-
-# Ruby test
-rtest:
- $(RUBYDIR)\win32\ruby -w -Ke test.rb
-
-# C library test
-ctest: $(testc)
- .\$(testc)
-
-# POSIX C library test
-ptest: $(testp)
- .\$(testp)
-
-$(testc): $(testc).c $(libname)
- $(CC) -nologo -o $(testc) $(testc).c $(libname)
-
-$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST -DIMPORT -o $(testp) $(testc).c $(dlllib)
-
-clean:
- del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
-
-
-16: cpruby
- patch -d $(RUBYDIR) -p0 < re.c.168.patch
-
-18: cpruby
- patch -d $(RUBYDIR) -p0 < re.c.180.patch
-
-# backup file suffix
-SORIG = ruby_orig
-
-cpruby:
- $(CP) $(RUBYDIR)\regex.c $(RUBYDIR)\regex.c.$(SORIG)
- $(CP) $(RUBYDIR)\regex.h $(RUBYDIR)\regex.h.$(SORIG)
- $(CP) $(RUBYDIR)\re.c $(RUBYDIR)\re.c.$(SORIG)
-# $(rubysources)
- $(CP) regex.c $(RUBYDIR)
- $(CP) regerror.c $(RUBYDIR)
- $(CP) regparse.c $(RUBYDIR)
- $(CP) regcomp.c $(RUBYDIR)
- $(CP) regexec.c $(RUBYDIR)
- $(CP) reggnu.c $(RUBYDIR)
-# $(onigheaders)
- $(CP) oniguruma.h $(RUBYDIR)\regex.h
- $(CP) regint.h $(RUBYDIR)
- $(CP) regparse.h $(RUBYDIR)
-
-rback:
- $(CP) $(RUBYDIR)\regex.c.$(SORIG) $(RUBYDIR)\regex.c
- $(CP) $(RUBYDIR)\regex.h.$(SORIG) $(RUBYDIR)\regex.h
- $(CP) $(RUBYDIR)\re.c.$(SORIG) $(RUBYDIR)\re.c
-
-samples:
- $(CC) $(CFLAGS) -I. -DIMPORT -o simple sample\simple.c $(dlllib)
- $(CC) $(CFLAGS) -I. -DIMPORT -o posix sample\posix.c $(dlllib)
- $(CC) $(CFLAGS) -I. -DIMPORT -o names sample\names.c $(dlllib)
diff --git a/ext/mbstring/oniguruma/win32/config.h b/ext/mbstring/oniguruma/win32/config.h
deleted file mode 100644
index bdbdaf25c1..0000000000
--- a/ext/mbstring/oniguruma/win32/config.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 0
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 4
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define HAVE_STRUCT_STAT_ST_RDEV 1
-#define HAVE_ST_RDEV 1
-#define GETGROUPS_T int
-#define RETSIGTYPE void
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c
index cf9afa8082..b1543f2909 100644
--- a/ext/mbstring/php_mbregex.c
+++ b/ext/mbstring/php_mbregex.c
@@ -37,22 +37,22 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring)
/* {{{ static void php_mb_regex_free_cache() */
static void php_mb_regex_free_cache(php_mb_regex_t **pre)
{
- php_mb_regex_free(*pre);
+ onig_free(*pre);
}
/* }}} */
/* {{{ _php_mb_regex_globals_ctor */
void _php_mb_regex_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
{
- MBSTRG(default_mbctype) = REGCODE_EUCJP;
- MBSTRG(current_mbctype) = REGCODE_EUCJP;
+ MBSTRG(default_mbctype) = ONIG_ENCODING_EUC_JP;
+ MBSTRG(current_mbctype) = ONIG_ENCODING_EUC_JP;
zend_hash_init(&(MBSTRG(ht_rc)), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
MBSTRG(search_str) = (zval*) NULL;
MBSTRG(search_re) = (php_mb_regex_t*)NULL;
MBSTRG(search_pos) = 0;
- MBSTRG(search_regs) = (php_mb_reg_region*)NULL;
- MBSTRG(regex_default_options) = RE_OPTION_POSIXLINE;
- MBSTRG(regex_default_syntax) = REG_SYNTAX_RUBY;
+ MBSTRG(search_regs) = (OnigRegion*)NULL;
+ MBSTRG(regex_default_options) = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
+ MBSTRG(regex_default_syntax) = ONIG_SYNTAX_RUBY;
}
/* }}} */
@@ -96,8 +96,8 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex)
MBSTRG(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) {
- php_mb_regex_region_free(MBSTRG(search_regs), 1);
- MBSTRG(search_regs) = (php_mb_reg_region *)NULL;
+ onig_region_free(MBSTRG(search_regs), 1);
+ MBSTRG(search_regs) = (OnigRegion *)NULL;
}
zend_hash_clean(&MBSTRG(ht_rc));
@@ -108,53 +108,152 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex)
/*
* encoding name resolver
*/
+
+/* {{{ encoding name map */
+typedef struct _php_mb_regex_enc_name_map_t {
+ const char *names;
+ OnigEncoding code;
+} php_mb_regex_enc_name_map_t;
+
+php_mb_regex_enc_name_map_t enc_name_map[] ={
+ {
+ "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
+ ONIG_ENCODING_EUC_JP
+ },
+ {
+ "UTF-8\0UTF8\0",
+ ONIG_ENCODING_UTF8
+ },
+ {
+ "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
+ ONIG_ENCODING_SJIS
+ },
+ {
+ "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
+ ONIG_ENCODING_BIG5
+ },
+ {
+ "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
+ ONIG_ENCODING_EUC_CN
+ },
+ {
+ "EUC-TW\0EUCTW\0EUC_TW\0",
+ ONIG_ENCODING_EUC_TW
+ },
+ {
+ "EUC-KR\0EUCKR\0EUC_KR\0",
+ ONIG_ENCODING_EUC_KR
+ },
+ {
+ "KOI8\0KOI-8\0",
+ ONIG_ENCODING_KOI8
+ },
+ {
+ "KOI8R\0KOI8-R\0KOI-8R\0",
+ ONIG_ENCODING_KOI8_R
+ },
+ {
+ "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
+ ONIG_ENCODING_ISO_8859_1
+ },
+ {
+ "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
+ ONIG_ENCODING_ISO_8859_2
+ },
+ {
+ "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
+ ONIG_ENCODING_ISO_8859_3
+ },
+ {
+ "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
+ ONIG_ENCODING_ISO_8859_4
+ },
+ {
+ "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
+ ONIG_ENCODING_ISO_8859_5
+ },
+ {
+ "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
+ ONIG_ENCODING_ISO_8859_6
+ },
+ {
+ "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
+ ONIG_ENCODING_ISO_8859_7
+ },
+ {
+ "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
+ ONIG_ENCODING_ISO_8859_8
+ },
+ {
+ "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
+ ONIG_ENCODING_ISO_8859_9
+ },
+ {
+ "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
+ ONIG_ENCODING_ISO_8859_10
+ },
+ {
+ "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
+ ONIG_ENCODING_ISO_8859_11
+ },
+ {
+ "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
+ ONIG_ENCODING_ISO_8859_13
+ },
+ {
+ "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
+ ONIG_ENCODING_ISO_8859_14
+ },
+ {
+ "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
+ ONIG_ENCODING_ISO_8859_15
+ },
+ {
+ "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
+ ONIG_ENCODING_ISO_8859_16
+ },
+ {
+ "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
+ ONIG_ENCODING_ASCII
+ },
+ { NULL, ONIG_ENCODING_UNDEF }
+};
+/* }}} */
+
/* {{{ php_mb_regex_name2mbctype */
-php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname)
+OnigEncoding php_mb_regex_name2mbctype(const char *pname)
{
- php_mb_reg_char_encoding mbctype;
-
- mbctype = REGCODE_UNDEF;
- if (pname != NULL) {
- if (strcasecmp("EUC-JP", pname) == 0
- || strcasecmp("X-EUC-JP", pname) == 0
- || strcasecmp("UJIS", pname) == 0
- || strcasecmp("EUCJP", pname) == 0
- || strcasecmp("EUC_JP", pname) == 0
- || strcasecmp("EUCJP-WIN", pname) == 0) {
- mbctype = REGCODE_EUCJP;
- } else if (strcasecmp("UTF-8", pname) == 0
- || strcasecmp("UTF8", pname) == 0) {
- mbctype = REGCODE_UTF8;
- } else if (strcasecmp("SJIS", pname) == 0
- || strcasecmp("CP932", pname) == 0
- || strcasecmp("MS932", pname) == 0
- || strcasecmp("SHIFT_JIS", pname) == 0
- || strcasecmp("SJIS-WIN", pname) == 0) {
- mbctype = REGCODE_SJIS;
- } else if (strcasecmp("ASCII", pname) == 0) {
- mbctype = REGCODE_ASCII;
+ const char *p;
+ php_mb_regex_enc_name_map_t *mapping;
+
+ if (pname == NULL) {
+ return ONIG_ENCODING_UNDEF;
+ }
+
+ for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
+ for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
+ if (strcasecmp(p, pname) == 0) {
+ return mapping->code;
+ }
}
}
- return mbctype;
+ return ONIG_ENCODING_UNDEF;
}
/* }}} */
/* {{{ php_mbregex_mbctype2name */
-const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype)
+const char *php_mb_regex_mbctype2name(OnigEncoding mbctype)
{
- const char *p = NULL;
-
- if (mbctype == REGCODE_EUCJP) {
- p = "EUC-JP";
- } else if(mbctype == REGCODE_UTF8) {
- p = "UTF-8";
- } else if(mbctype == REGCODE_SJIS) {
- p = "SJIS";
- } else if(mbctype == REGCODE_ASCII) {
- p = "ascii";
+ php_mb_regex_enc_name_map_t *mapping;
+
+ for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
+ if (mapping->code == mbctype) {
+ return mapping->names;
+ }
}
- return p;
+
+ return NULL;
}
/* }}} */
@@ -162,18 +261,18 @@ const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype)
* regex cache
*/
/* {{{ php_mbregex_compile_pattern */
-static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, php_mb_reg_option_type options, php_mb_reg_char_encoding enc, php_mb_reg_syntax_type *syntax TSRMLS_DC)
+static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
{
int err_code = 0;
int found = 0;
php_mb_regex_t *retval = NULL, **rc = NULL;
- php_mb_reg_error_info err_info;
- UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
+ OnigErrorInfo err_info;
+ UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
found = zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
- if ((err_code = php_mb_regex_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != REG_NORMAL) {
- php_mb_regex_error_code_to_str(err_str, err_code, err_info);
+ if ((err_code = onig_new(&retval, (UChar *)pattern, (UChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
+ onig_error_code_to_str(err_str, err_code, err_info);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
retval = NULL;
goto out;
@@ -188,14 +287,14 @@ out:
/* }}} */
/* {{{ _php_mb_regex_get_option_string */
-static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_option_type option, php_mb_reg_syntax_type *syntax)
+static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
{
size_t len_left = len;
size_t len_req = 0;
char *p = str;
char c;
- if ((option & RE_OPTION_IGNORECASE) != 0) {
+ if ((option & ONIG_OPTION_IGNORECASE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'i';
@@ -203,7 +302,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
- if ((option & RE_OPTION_EXTENDED) != 0) {
+ if ((option & ONIG_OPTION_EXTEND) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'x';
@@ -211,14 +310,15 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
- if ((option & RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
+ if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
+ (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
if (len_left > 0) {
--len_left;
*(p++) = 'p';
}
++len_req;
} else {
- if ((option & RE_OPTION_MULTILINE) != 0) {
+ if ((option & ONIG_OPTION_MULTILINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'm';
@@ -226,7 +326,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
- if ((option & RE_OPTION_SINGLELINE) != 0) {
+ if ((option & ONIG_OPTION_SINGLELINE) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 's';
@@ -234,14 +334,14 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
++len_req;
}
}
- if ((option & RE_OPTION_LONGEST) != 0) {
+ if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'l';
}
++len_req;
}
- if ((option & REG_OPTION_FIND_NOT_EMPTY) != 0) {
+ if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
if (len_left > 0) {
--len_left;
*(p++) = 'n';
@@ -251,21 +351,21 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
c = 0;
- if (syntax == REG_SYNTAX_JAVA) {
+ if (syntax == ONIG_SYNTAX_JAVA) {
c = 'j';
- } else if (syntax == REG_SYNTAX_GNU_REGEX) {
+ } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
c = 'u';
- } else if (syntax == REG_SYNTAX_GREP) {
+ } else if (syntax == ONIG_SYNTAX_GREP) {
c = 'g';
- } else if (syntax == REG_SYNTAX_EMACS) {
+ } else if (syntax == ONIG_SYNTAX_EMACS) {
c = 'c';
- } else if (syntax == REG_SYNTAX_RUBY) {
+ } else if (syntax == ONIG_SYNTAX_RUBY) {
c = 'r';
- } else if (syntax == REG_SYNTAX_PERL) {
+ } else if (syntax == ONIG_SYNTAX_PERL) {
c = 'z';
- } else if (syntax == REG_SYNTAX_POSIX_BASIC) {
+ } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
c = 'b';
- } else if (syntax == REG_SYNTAX_POSIX_EXTENDED) {
+ } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
c = 'd';
}
@@ -293,13 +393,13 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, php_mb_reg_
/* {{{ _php_mb_regex_init_options */
static void
-_php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *option, php_mb_reg_syntax_type **syntax, int *eval)
+_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
{
int n;
char c;
int optm = 0;
- *syntax = REG_SYNTAX_RUBY;
+ *syntax = ONIG_SYNTAX_RUBY;
if (parg != NULL) {
n = 0;
@@ -307,49 +407,49 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o
c = parg[n++];
switch (c) {
case 'i':
- optm |= RE_OPTION_IGNORECASE;
+ optm |= ONIG_OPTION_IGNORECASE;
break;
case 'x':
- optm |= RE_OPTION_EXTENDED;
+ optm |= ONIG_OPTION_EXTEND;
break;
case 'm':
- optm |= RE_OPTION_MULTILINE;
+ optm |= ONIG_OPTION_MULTILINE;
break;
case 's':
- optm |= RE_OPTION_SINGLELINE;
+ optm |= ONIG_OPTION_SINGLELINE;
break;
case 'p':
- optm |= RE_OPTION_POSIXLINE;
+ optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
break;
case 'l':
- optm |= RE_OPTION_LONGEST;
+ optm |= ONIG_OPTION_FIND_LONGEST;
break;
case 'n':
- optm |= REG_OPTION_FIND_NOT_EMPTY;
+ optm |= ONIG_OPTION_FIND_NOT_EMPTY;
break;
case 'j':
- *syntax = REG_SYNTAX_JAVA;
+ *syntax = ONIG_SYNTAX_JAVA;
break;
case 'u':
- *syntax = REG_SYNTAX_GNU_REGEX;
+ *syntax = ONIG_SYNTAX_GNU_REGEX;
break;
case 'g':
- *syntax = REG_SYNTAX_GREP;
+ *syntax = ONIG_SYNTAX_GREP;
break;
case 'c':
- *syntax = REG_SYNTAX_EMACS;
+ *syntax = ONIG_SYNTAX_EMACS;
break;
case 'r':
- *syntax = REG_SYNTAX_RUBY;
+ *syntax = ONIG_SYNTAX_RUBY;
break;
case 'z':
- *syntax = REG_SYNTAX_PERL;
+ *syntax = ONIG_SYNTAX_PERL;
break;
case 'b':
- *syntax = REG_SYNTAX_POSIX_BASIC;
+ *syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case 'd':
- *syntax = REG_SYNTAX_POSIX_EXTENDED;
+ *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
case 'e':
if (eval != NULL) *eval = 1;
@@ -372,7 +472,7 @@ _php_mb_regex_init_options(const char *parg, int narg, php_mb_reg_option_type *o
PHP_FUNCTION(mb_regex_encoding)
{
zval **arg1;
- php_mb_reg_char_encoding mbctype;
+ OnigEncoding mbctype;
if (ZEND_NUM_ARGS() == 0) {
const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
@@ -406,8 +506,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
char *string;
int string_len;
php_mb_regex_t *re;
- php_mb_reg_region *regs = NULL;
- int i, match_len, option, beg, end;
+ OnigRegion *regs = NULL;
+ int i, match_len, beg, end;
+ OnigOptionType options;
char *str;
array = NULL;
@@ -416,9 +517,9 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
RETURN_FALSE;
}
- option = MBSTRG(regex_default_options);
+ options = MBSTRG(regex_default_options);
if (icase) {
- option |= RE_OPTION_IGNORECASE;
+ options |= ONIG_OPTION_IGNORECASE;
}
/* compile the regular expression from the supplied regex */
@@ -433,16 +534,16 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
arg_pattern = &tmp;
/* don't bother doing an extended regex with just a number */
}
- re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), option, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC);
+ re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBSTRG(current_mbctype), MBSTRG(regex_default_syntax) TSRMLS_CC);
if (re == NULL) {
RETVAL_FALSE;
goto out;
}
- regs = php_mb_regex_region_new();
+ regs = onig_region_new();
/* actually execute the regular expression */
- if (php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) {
+ if (onig_search(re, (UChar *)string, (UChar *)(string + string_len), string, (UChar *)(string + string_len), regs, 0) < 0) {
RETVAL_FALSE;
goto out;
}
@@ -471,7 +572,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
RETVAL_LONG(match_len);
out:
if (regs != NULL) {
- php_mb_regex_region_free(regs, 1);
+ onig_region_free(regs, 1);
}
if (arg_pattern == &tmp) {
zval_dtor(&tmp);
@@ -496,7 +597,7 @@ PHP_FUNCTION(mb_eregi)
/* }}} */
/* {{{ _php_mb_regex_ereg_replace_exec */
-static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option)
+static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
{
zval *arg_pattern_zval;
@@ -511,8 +612,8 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
char *p;
php_mb_regex_t *re;
- php_mb_reg_syntax_type *syntax;
- php_mb_reg_region *regs = NULL;
+ OnigSyntaxType *syntax;
+ OnigRegion *regs = NULL;
smart_str out_buf = { 0 };
smart_str eval_buf = { 0 };
smart_str *pbuf;
@@ -547,9 +648,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
}
if (option_str != NULL) {
- _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, &eval);
+ _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
} else {
- option |= MBSTRG(regex_default_options);
+ options |= MBSTRG(regex_default_options);
syntax = MBSTRG(regex_default_syntax);
}
}
@@ -566,7 +667,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
arg_pattern_len = 1;
}
/* create regex pattern buffer */
- re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBSTRG(current_mbctype), syntax TSRMLS_CC);
+ re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBSTRG(current_mbctype), syntax TSRMLS_CC);
if (re == NULL) {
RETURN_FALSE;
}
@@ -583,12 +684,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
err = 0;
pos = string;
string_lim = (UChar*)(string + string_len);
- regs = php_mb_regex_region_new();
+ regs = onig_region_new();
while (err >= 0) {
- err = php_mb_regex_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0);
+ err = onig_search(re, (UChar *)string, (UChar *)string_lim, pos, (UChar *)string_lim, regs, 0);
if (err <= -2) {
- UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
- php_mb_regex_error_code_to_str(err_str, err);
+ UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
break;
}
@@ -651,14 +752,14 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int op
smart_str_appendl(&out_buf, pos, string_lim - pos);
}
}
- php_mb_regex_region_free(regs, 0);
+ onig_region_free(regs, 0);
}
if (description) {
efree(description);
}
if (regs != NULL) {
- php_mb_regex_region_free(regs, 1);
+ onig_region_free(regs, 1);
}
smart_str_free(&eval_buf);
@@ -684,7 +785,7 @@ PHP_FUNCTION(mb_ereg_replace)
Case insensitive replace regular expression for multibyte string */
PHP_FUNCTION(mb_eregi_replace)
{
- _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, RE_OPTION_IGNORECASE);
+ _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
}
/* }}} */
@@ -695,7 +796,7 @@ PHP_FUNCTION(mb_split)
char *arg_pattern;
int arg_pattern_len;
php_mb_regex_t *re;
- php_mb_reg_region *regs = NULL;
+ OnigRegion *regs = NULL;
char *string;
UChar *pos;
int string_len;
@@ -720,10 +821,10 @@ PHP_FUNCTION(mb_split)
pos = (UChar *)string;
err = 0;
- regs = php_mb_regex_region_new();
+ regs = onig_region_new();
/* churn through str, generating array entries as we go */
while ((--count != 0) &&
- (err = php_mb_regex_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) {
+ (err = onig_search(re, (UChar *)string, (UChar *)(string + string_len), pos, (UChar *)(string + string_len), regs, 0)) >= 0) {
if (regs->beg[0] == regs->end[0]) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
break;
@@ -744,15 +845,15 @@ PHP_FUNCTION(mb_split)
if (count < 0) {
count = 0;
}
- php_mb_regex_region_free(regs, 0);
+ onig_region_free(regs, 0);
}
- php_mb_regex_region_free(regs, 1);
+ onig_region_free(regs, 1);
/* see if we encountered an error */
if (err <= -2) {
- UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
- php_mb_regex_error_code_to_str(err_str, err);
+ UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
zval_dtor(return_value);
RETURN_FALSE;
@@ -779,7 +880,7 @@ PHP_FUNCTION(mb_ereg_match)
int string_len;
php_mb_regex_t *re;
- php_mb_reg_syntax_type *syntax;
+ OnigSyntaxType *syntax;
int option = 0, err;
{
@@ -805,7 +906,7 @@ PHP_FUNCTION(mb_ereg_match)
}
/* match */
- err = php_mb_regex_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0);
+ err = onig_match(re, (UChar *)string, (UChar *)(string + string_len), (UChar *)string, NULL, 0);
if (err >= 0) {
RETVAL_TRUE;
} else {
@@ -822,7 +923,7 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
zval **arg_pattern, **arg_options;
int n, i, err, pos, len, beg, end, option;
UChar *str;
- php_mb_reg_syntax_type *syntax;
+ OnigSyntaxType *syntax;
option = MBSTRG(regex_default_options);
switch (ZEND_NUM_ARGS()) {
@@ -873,17 +974,17 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
if (MBSTRG(search_regs)) {
- php_mb_regex_region_free(MBSTRG(search_regs), 1);
+ onig_region_free(MBSTRG(search_regs), 1);
}
- MBSTRG(search_regs) = php_mb_regex_region_new();
+ MBSTRG(search_regs) = onig_region_new();
- err = php_mb_regex_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0);
- if (err == REG_MISMATCH) {
+ err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len, MBSTRG(search_regs), 0);
+ if (err == ONIG_MISMATCH) {
MBSTRG(search_pos) = len;
RETVAL_FALSE;
} else if (err <= -2) {
- UChar err_str[REG_MAX_ERROR_MESSAGE_LEN];
- php_mb_regex_error_code_to_str(err_str, err);
+ UChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str(err_str, err);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
RETVAL_FALSE;
} else {
@@ -924,8 +1025,8 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
if (err < 0) {
- php_mb_regex_region_free(MBSTRG(search_regs), 1);
- MBSTRG(search_regs) = (php_mb_reg_region *)NULL;
+ onig_region_free(MBSTRG(search_regs), 1);
+ MBSTRG(search_regs) = (OnigRegion *)NULL;
}
}
/* }}} */
@@ -959,7 +1060,7 @@ PHP_FUNCTION(mb_ereg_search_regs)
PHP_FUNCTION(mb_ereg_search_init)
{
zval **arg_str, **arg_pattern, **arg_options;
- php_mb_reg_syntax_type *syntax = NULL;
+ OnigSyntaxType *syntax = NULL;
int option;
option = MBSTRG(regex_default_options);
@@ -1008,8 +1109,8 @@ PHP_FUNCTION(mb_ereg_search_init)
MBSTRG(search_pos) = 0;
if (MBSTRG(search_regs) != NULL) {
- php_mb_regex_region_free(MBSTRG(search_regs), 1);
- MBSTRG(search_regs) = (php_mb_reg_region *) NULL;
+ onig_region_free(MBSTRG(search_regs), 1);
+ MBSTRG(search_regs) = (OnigRegion *) NULL;
}
RETURN_TRUE;
@@ -1076,7 +1177,7 @@ PHP_FUNCTION(mb_ereg_search_setpos)
/* }}} */
/* {{{ php_mb_regex_set_options */
-void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC)
+void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
{
if (prev_options != NULL) {
*prev_options = MBSTRG(regex_default_options);
@@ -1093,8 +1194,8 @@ void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_
Set or get the default options for mbregex functions */
PHP_FUNCTION(mb_regex_set_options)
{
- php_mb_reg_option_type opt;
- php_mb_reg_syntax_type *syntax;
+ OnigOptionType opt;
+ OnigSyntaxType *syntax;
char *string = NULL;
int string_len;
char buf[16];
diff --git a/ext/mbstring/php_mbregex.h b/ext/mbstring/php_mbregex.h
index 86c9bf970a..7d99953a70 100644
--- a/ext/mbstring/php_mbregex.h
+++ b/ext/mbstring/php_mbregex.h
@@ -29,16 +29,16 @@
/* {{{ PHP_MBREGEX_GLOBALS */
#define PHP_MBREGEX_GLOBALS \
- php_mb_reg_char_encoding default_mbctype; \
- php_mb_reg_char_encoding current_mbctype; \
+ OnigEncoding default_mbctype; \
+ OnigEncoding current_mbctype; \
HashTable ht_rc; \
zval *search_str; \
zval *search_str_val; \
unsigned int search_pos; \
php_mb_regex_t *search_re; \
- struct php_mb_re_registers *search_regs; \
- int regex_default_options; \
- php_mb_reg_syntax_type *regex_default_syntax;
+ OnigRegion *search_regs; \
+ OnigOptionType regex_default_options; \
+ OnigSyntaxType *regex_default_syntax;
/* }}} */
/* {{{ PHP_MBREGEX_FUNCTION_ENTRIES */
@@ -83,10 +83,10 @@ PHP_MSHUTDOWN_FUNCTION(mb_regex);
PHP_RINIT_FUNCTION(mb_regex);
PHP_RSHUTDOWN_FUNCTION(mb_regex);
void _php_mb_regex_globals_ctor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
-void php_mb_regex_set_options(php_mb_reg_option_type options, php_mb_reg_syntax_type *syntax, php_mb_reg_option_type *prev_options, php_mb_reg_syntax_type **prev_syntax TSRMLS_DC);
+void php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC);
void _php_mb_regex_globals_dtor(zend_mbstring_globals_ptr pglobals TSRMLS_DC);
-php_mb_reg_char_encoding php_mb_regex_name2mbctype(const char *pname);
-const char *php_mb_regex_mbctype2name(php_mb_reg_char_encoding mbctype);
+OnigEncoding php_mb_regex_name2mbctype(const char *pname);
+const char *php_mb_regex_mbctype2name(OnigEncoding mbctype);
PHP_FUNCTION(mb_regex_encoding);
PHP_FUNCTION(mb_ereg);