summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma')
-rw-r--r--ext/mbstring/oniguruma/AUTHORS1
-rw-r--r--ext/mbstring/oniguruma/COPYING32
-rw-r--r--ext/mbstring/oniguruma/HISTORY1838
-rw-r--r--ext/mbstring/oniguruma/README189
-rw-r--r--ext/mbstring/oniguruma/README.ja192
-rw-r--r--ext/mbstring/oniguruma/config.h.in108
-rw-r--r--ext/mbstring/oniguruma/doc/API585
-rw-r--r--ext/mbstring/oniguruma/doc/API.ja592
-rw-r--r--ext/mbstring/oniguruma/doc/FAQ37
-rw-r--r--ext/mbstring/oniguruma/doc/FAQ.ja122
-rw-r--r--ext/mbstring/oniguruma/doc/RE412
-rw-r--r--ext/mbstring/oniguruma/doc/RE.ja424
-rw-r--r--ext/mbstring/oniguruma/enc/ascii.c67
-rw-r--r--ext/mbstring/oniguruma/enc/big5.c168
-rw-r--r--ext/mbstring/oniguruma/enc/euc_jp.c228
-rw-r--r--ext/mbstring/oniguruma/enc/euc_kr.c173
-rw-r--r--ext/mbstring/oniguruma/enc/euc_tw.c144
-rw-r--r--ext/mbstring/oniguruma/enc/gb18030.c501
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_1.c151
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_10.c300
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_11.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_13.c268
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_14.c298
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_15.c279
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_16.c292
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_2.c292
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_3.c281
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_4.c290
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_5.c296
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_6.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_7.c278
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_8.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_9.c270
-rw-r--r--ext/mbstring/oniguruma/enc/koi8.c264
-rw-r--r--ext/mbstring/oniguruma/enc/koi8_r.c266
-rw-r--r--ext/mbstring/oniguruma/enc/mktable.c1115
-rw-r--r--ext/mbstring/oniguruma/enc/sjis.c238
-rw-r--r--ext/mbstring/oniguruma/enc/unicode.c3403
-rw-r--r--ext/mbstring/oniguruma/enc/utf16_be.c232
-rw-r--r--ext/mbstring/oniguruma/enc/utf16_le.c230
-rw-r--r--ext/mbstring/oniguruma/enc/utf32_be.c187
-rw-r--r--ext/mbstring/oniguruma/enc/utf32_le.c185
-rw-r--r--ext/mbstring/oniguruma/enc/utf8.c3730
-rwxr-xr-xext/mbstring/oniguruma/index.html187
-rw-r--r--ext/mbstring/oniguruma/onigcmpt200.h310
-rw-r--r--ext/mbstring/oniguruma/oniggnu.h85
-rw-r--r--ext/mbstring/oniguruma/onigposix.h169
-rw-r--r--ext/mbstring/oniguruma/oniguruma.h905
-rw-r--r--ext/mbstring/oniguruma/regcomp.c6044
-rw-r--r--ext/mbstring/oniguruma/regenc.c1028
-rw-r--r--ext/mbstring/oniguruma/regenc.h147
-rw-r--r--ext/mbstring/oniguruma/regerror.c371
-rw-r--r--ext/mbstring/oniguruma/regexec.c3949
-rw-r--r--ext/mbstring/oniguruma/regext.c215
-rw-r--r--ext/mbstring/oniguruma/reggnu.c175
-rw-r--r--ext/mbstring/oniguruma/regint.h830
-rw-r--r--ext/mbstring/oniguruma/regparse.c5290
-rw-r--r--ext/mbstring/oniguruma/regparse.h328
-rw-r--r--ext/mbstring/oniguruma/regposerr.c90
-rw-r--r--ext/mbstring/oniguruma/regposix.c303
-rw-r--r--ext/mbstring/oniguruma/regsyntax.c236
-rw-r--r--ext/mbstring/oniguruma/regtrav.c76
-rw-r--r--ext/mbstring/oniguruma/regversion.c55
-rw-r--r--ext/mbstring/oniguruma/st.c589
-rw-r--r--ext/mbstring/oniguruma/st.h63
-rw-r--r--ext/mbstring/oniguruma/win32/config.h84
66 files changed, 40802 insertions, 0 deletions
diff --git a/ext/mbstring/oniguruma/AUTHORS b/ext/mbstring/oniguruma/AUTHORS
new file mode 100644
index 0000000..93167bd
--- /dev/null
+++ b/ext/mbstring/oniguruma/AUTHORS
@@ -0,0 +1 @@
+sndgk393 AT ybb DOT ne DOT jp (K.Kosako)
diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING
new file mode 100644
index 0000000..4d321bb
--- /dev/null
+++ b/ext/mbstring/oniguruma/COPYING
@@ -0,0 +1,32 @@
+Oniguruma LICENSE
+-----------------
+
+When this software is partly used or it is distributed with Ruby,
+this of Ruby follows the license of Ruby.
+It follows the BSD license in the case of the one except for it.
+
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
new file mode 100644
index 0000000..a1debef
--- /dev/null
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -0,0 +1,1838 @@
+History
+
+2007/08/16: Version 4.7.1
+
+2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/07/04: [spec] (thanks K.Takata)
+ ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl)
+2007/07/04: [dist] (thanks K.Takata)
+ fix documents API and API.ja.
+
+2007/06/18: Version 4.7.0
+
+2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/06/18: [bug] (thanks KUBO Takehiro)
+ WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint).
+2007/06/05: [impl] add #ifndef vsnprintf in regint.h.
+2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case
+ in onig_search().
+
+2007/04/12: Version 4.6.2
+
+2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000
+ to 0x4000.
+2007/03/26: [impl] add 'void' to function declarations.
+
+2007/03/06: Version 4.6.1
+
+2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/03/06: [bug] add #include <malloc.h> for bcc32.
+ (In bcc32, alloca() is declared in malloc.h.)
+2007/03/06: [impl] remove including version.h of Ruby.
+2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search().
+ ex. /\n\Z/.match("aaaaaaaaaa\n")
+2007/03/02: [impl] move range > start check position in end_buf process.
+
+2007/02/08: Version 4.6.0
+
+2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/01/09: [tune] select_opt_exact_info() didn't work for empty info.
+ ex. /.a/ make MAP info instead of EXACT info.
+2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode.
+2006/12/22: [spec] should check too short multibyte char in parse_exp().
+ add USE_PAD_TO_SHORT_BYTE_CHAR.
+ ex. /\x00/ in UTF16 should be error.
+
+2006/11/17: Version 4.5.1
+
+2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/15: [impl] remove CHECK_INTERRUPT.
+2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e
+ should be [:punct:].
+2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER.
+2006/11/07: [bug] (thanks Byte)
+ add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R.
+
+2006/11/06: Version 4.5.0
+
+2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND.
+2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of
+ the string range.
+ add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
+
+2006/10/30: Version 4.4.6
+
+2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/30: [impl] (thanks K.Takata)
+ add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END.
+2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner)
+ invalid offset value was used in STATE_CHECK_BUFF_INIT().
+
+2006/10/24: Version 4.4.5
+
+2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/24: [impl] escape -Wall warning.
+2006/10/24: [tune] (thanks Kornelius Kalnbach)
+ String#scan for long string needs long time compare with
+ old Ruby
+ by initialization time for combination explosion check
+ ex. ("test " * 100_000).scan(/\w*\s?/)
+ change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000.
+ reduce initialization area of state_check_buff.
+2006/10/16: [bug] (thanks Akinori Musha)
+ first argument of rb_warn() should be format string.
+2006/10/10: [impl] add msa.state_check_buff_size initialization
+ in onig_search().
+2006/10/10: [bug] should call onig_st_free_table() in
+ onig_free_shared_cclass_table().
+2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB.
+2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT().
+ make valgrind happy.
+2006/09/22: [impl] convert to ascii for parameter string in
+ onig_error_code_to_str().
+ add enc member into OnigErrorInfo.
+
+2006/09/19: Version 4.4.4
+
+2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/19: [impl] (thanks KOYAMA Tetsuji)
+ HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
+ by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
+
+2006/09/15: Version 4.4.3
+
+2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/15: [bug] (thanks Allan Odgaard)
+ out of range access in bm_search_notrev().
+ (p < s)
+
+2006/09/08: Version 4.4.2
+
+2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/08: [bug] (thanks K.Takata)
+ out of range access in bm_search_notrev().
+2006/09/04: [spec] (thanks K.Takata)
+ allow look-behind in negative look-behind.
+ ex. /(?<!(?<=a)b|c)d/
+
+2006/08/29: Version 4.4.1
+
+2006/08/29: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/29: [dist] (thanks Seiji Masugata)
+ add configure option --enable-combination-explosion-check
+
+2006/08/25: Version 4.4.0
+
+2006/08/25: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/25: [impl] add_state_check_num() should be enclosed in
+ ifdef USE_COMBINATION_EXPLOSION_CHECK.
+2006/08/23: [spec] config USE_COMBINATION_EXPLOSION_CHECK is enabled
+ in Ruby mode only.
+2006/08/22: [impl] remove last line comma in enum OpCode.
+2006/08/22: [impl] remove OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT and
+ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT.
+2006/08/22: [impl] remove OP_BACKREF3.
+
+2006/08/21: Version 4.3.1
+
+2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/21: [impl] change stack type values
+ and re-define STK_MASK_TO_VOID_TARGET etc...
+2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite.
+2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE.
+2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n}
+2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
+ if backreference is not used.
+2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern
+ in combination explosion check.
+
+2006/08/17: Version 4.3.0
+
+2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK.
+ check /(.+)*/, /(\s*foo\s*)*/ etc...
+ [API] add num_comb_exp_check member in regex_t.
+ [dist] change LTVERSION value to "1:0:0" in configure.in.
+2006/08/15: [bug] OP_REPEAT_INC process in match_at().
+ should check repeat-count >= range-upper and
+ range-upper may be infinite.
+
+2006/08/11: Version 4.2.3
+
+2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/10: [impl] remove double call in set_qualifier().
+2006/08/10: [impl] remove by_number member in QualifierNode.
+2006/08/09: [impl] remove a comma at the end of enum ReduceType
+ for escape warning on Mac OS X.
+2006/08/07: [impl] remove warning in regcomp.c.
+2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
+
+2006/08/03: Version 4.2.2
+
+2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/03: [bug] (thanks Hiroyuki Yamamoto)
+ segmentation fault in regexec(). (POSIX API)
+2006/08/02: [bug] combination of \G in look-ahead/look-behind and other
+ anchors(\A, \z, \Z) cause invalid result.
+ ex. /(?!\G)a\z/.match("ba")
+ start arg. of MATCH_ARG_INIT() should be original
+ arg. of onig_search().
+
+2006/07/31: Version 4.2.1
+
+2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/07/31: [bug] (thanks Kimura Minoru)
+ re-implement bm_search_notrev().
+2006/07/31: [impl] bm_search_notrev() refactoring.
+2006/07/31: [bug] (thanks Kimura Minoru)
+ fix incomplete multibyte string in exact info.
+2006/07/31: [impl] (thanks Seiji Masugata)
+ remove cast in va_init_list() for Intel C Compiler.
+
+2006/07/18: Version 4.2.0
+
+2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/18: [new] (thanks Wolfgang Nadasi-Donner)
+ add back reference with nest level.
+ \k<name+n>, \k<name-n>
+2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX
+ and ONIG_SYN_XXX number literals.
+
+2006/07/03: Version 4.1.2
+
+2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner)
+ allow \G in look-behind.
+ add ANCHOR_BEGIN_POSITION flag in setup_tree().
+2006/06/12: [impl] (thanks matz)
+ fix cast from char* to const char*
+ in onig_snprintf_with_pattern().
+ fix cast from char* to const char*
+ for PopularQStr[] and ReduceQStr[].
+
+2006/05/22: Version 4.1.1
+
+2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/22: [impl] add position string argument to STACK_BASE_CHECK().
+2006/05/22: [bug] (thanks NARUSE, Yui)
+ add STK_NULL_CHECK_END to IS_TO_VOID_TARGET().
+ ex. core dump in
+ /(?<pare>\(([^\(\)]++|\g<pare>)*+\))/.match('((a))')
+
+2006/05/15: Version 4.1.0
+
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/15: [impl] thread atomic changes for onig_end() and
+ onig_free_node_list().
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja.
+2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe()
+ and re_recompile_pattern().
+ add config USE_RECOMPILE_API.
+2006/05/15: [impl] improved thread safe implementation of onig_search()
+ and onig_match().
+
+2006/05/11: Version 4.0.4
+
+2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/11: [bug] (thanks Yuji Kaneda)
+ dead-lock in onig_end().
+2006/05/11: [dist] update index.html.
+
+2006/05/08: Version 4.0.3
+
+2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/08: [bug] (thanks Allan Odgaard)
+ Segmentation fault in backward search.
+ ex. /^\t.*$/
+2006/04/18: [dist] update index.html.
+2006/04/05: [dist] update index.html.
+2006/03/24: [dist] update doc/RE, doc/RE.ja.
+
+2006/03/23: Version 4.0.2
+
+2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP
+ and ONIG_OPTION_CAPTURE_GROUP check.
+2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS.
+2006/03/22: [impl] remove USE_NAMED_GROUP condition from
+ ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect().
+2006/03/22: [new] add API onig_noname_group_capture_is_active().
+2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType.
+ add typedef OnigRegexType regex_t
+ unless ONIG_ESCAPE_REGEX_T_COLLISION is defined.
+2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000
+ to 10000. (for docdiff program)
+2006/02/17: [dist] change COPYING year 2005 -> 2006.
+
+2006/02/07: Version 4.0.1
+
+2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table().
+2006/02/03: [ruby] add -m 0644 option to install command in "make 19".
+2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML.
+ change from IS_POSIXLINE() to IS_MULTILINE()
+ for ANCHOR_ANYCHAR_START/_ML decision
+ in optimize_node_left().
+2006/01/26: [dist] update index.html for Oniguruma 2.5.3.
+2006/01/25: [dist] update URL in index.html.
+
+2006/01/24: Version 4.0.0
+
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin].
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/01/24: [dist] remove warnings from sample/encode.c.
+2006/01/24: [dist] change install description in README(.ja).
+2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS.
+2006/01/24: [dist] --- support shared library ---
+ use GNU libtool/automake.
+ change configure.in and add Makefile.am, sample/Makefile.am.
+ add AUTHORS file.
+2006/01/24: [dist] test programs return exit code -1 when test fails.
+2006/01/24: [bug] (thanks KIMURA Koichi)
+ invalid syntax definition in ONIG_SYNTAX_GREP.
+ ONIG_SYN_OP_BRACE_INTERVAL
+ -> ONIG_SYN_OP_ESC_BRACE_INTERVAL
+2006/01/23: [dist] fix configure.in for onig-config.
+2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS.
+ (U+000d, U+0085, U+2028, U+2029)
+2005/12/29: [dist] change pmatch array size to 25 in testconv.rb.
+2005/12/26: [dist] fix name in test.rb.
+2005/12/26: [dist] update index.html for 2.5.1.
+
+2005/11/29: Version 3.9.1
+
+2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/21: [bug] (thanks Allan Odgaard)
+ utf-8 character comments in extended mode leads
+ invalid result.
+ ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) /
+ fix onigenc_unicode_is_code_ctype() and
+ utf8_is_code_ctype().
+2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe)
+ begin-line anchor and BM search optimization leads
+ invalid result in UTF-16/32.
+ fix in set_optimize_exact_info().
+
+2005/11/20: Version 3.9.0
+
+2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin].
+2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR.
+ (!!! NO SUPPORT experimental option !!!)
+2005/11/15: [bug] (thanks Allan Odgaard)
+ tok->escape was not cleared in fetch_token_in_cc().
+ ex. [\s&&[^\n]] makes wrong result.
+2005/10/18: [impl] (thanks nobu)
+ change sjis_mbc_enc_len()
+ and node_new_cclass_by_codepoint_range() scope to static.
+2005/09/05: [dist] remove link to MultiFind.
+2005/09/01: [dist] add link to yagrep.
+
+2005/08/23: Version 3.8.9
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [inst] fix Makefile.in for make ctest/ptest.
+
+2005/08/23: Version 3.8.8
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc().
+2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at().
+2005/08/23: [impl] (thanks akr)
+ add ONIG_OPTION_MAXBIT for escape conflict with
+ Ruby's option.
+2005/08/22: [impl] escape GCC 4.0 warnings for testc.c.
+2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840]
+ UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node().
+ abort on /\S*/ =~ "\xfe"
+2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c.
+2005/08/22: [impl] fix testconvu.rb.
+2005/08/22: [impl] escape GCC 4.0 warnings.
+
+2005/08/09: Version 3.8.7
+
+2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/09: [bug] (thanks Allan Odgaard)
+ should not call enc_len() for s == range
+ in onig_search().
+2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install.
+
+2005/07/27: Version 3.8.6
+
+2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux].
+2005/07/27: [impl] update onig-config.in.
+2005/07/26: [new] (thanks Yen-Ju Chen)
+ add Oniguruma configuration check program.
+ (onig-config.in)
+
+2005/07/14: Version 3.8.5
+
+2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux].
+2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux].
+2005/07/11: [bug] (thanks nobu) [ruby-dev:26505]
+ invalid handling for /\c\x/ and /\C-\x/.
+ fix fetch_escaped_value().
+2005/07/05: [impl] (thanks Alexey Zakhlestine)
+ escape GCC 4.0 warnings.
+
+2005/07/01: Version 3.8.4
+
+2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux].
+2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/30: [dist] add GB 18030 test to sample/encode.c.
+2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head().
+2005/06/30: [new] (contributed by KUBO Takehiro)
+ add new character encoding ONIG_ENCODING_GB18030.
+2005/06/30: [bug] invalid ctype check for multibyte encodings.
+ ("graph", "print")
+ fix onigenc_mb2/4_is_code_ctype(),
+ eucjp_is_code_ctype() and sjis_is_code_ctype().
+2005/06/30: [bug] invalid conversion from code point to mbc in
+ onigenc_mb4_code_to_mbc().
+
+2005/06/28: Version 3.8.3
+
+2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner)
+ invalid check for never ending recursion.
+ lower zero quantifier should be treated as
+ a non-recursive call alternative.
+ ex. /(?<bal>[^()]*(\(\g<bal>\)[^()]*)*)/
+2005/06/15: [impl] add divide_ambig_string_node_sub().
+2005/06/15: [dist] add a test to sample/encode.c.
+2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group)
+
+2005/06/01: Version 3.8.2
+
+2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja.
+2005/05/31: [impl] minor change in node_new().
+2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/30: [bug] (thanks Allan Odgaard)
+ FreeNodeList null check should be on thread-atomic
+ in node_new().
+
+2005/05/11: Version 3.8.1
+
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32].
+2005/05/11: [dist] update win32/Makefile (make 19).
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux].
+2005/05/06: [impl] (thanks nobu) [ruby-core:4815]
+ add #ifdef USE_VARIABLE_META_CHARS to goto label.
+2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux].
+2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION
+ to onig_rb_warn() and onig_rb_warning().
+
+2005/04/15: Version 3.8.0
+
+2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux].
+2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/04/01: [impl] (thanks Joe Orton)
+ (thanks Moriyoshi Koizumi)
+ many const-ification to many *.[ch] files.
+
+2005/03/25: Version 3.7.2
+
+2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/23: [new] add ONIG_SYNTAX_ASIS.
+2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE.
+2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API)
+2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/08: [impl] (thanks matz) [ruby-dev:25783]
+ should not allocate memory for key data in st.c.
+ move st_*_strend() functions from st.c. fixed some
+ potential memory leaks.
+ (imported from Ruby 1.9 2005-03-08)
+
+2005/03/07: Version 3.7.1
+
+2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux].
+2005/03/07: [impl] (thanks Rui Hirokawa)
+ add ONIG_ESCAPE_UCHAR_COLLISION.
+ rename UChar to OnigUChar in oniguruma.h.
+2005/03/07: [impl] remove declarations for Ruby in oniggnu.h.
+2005/03/05: [bug] ANCHOR_ANYCHAR_STAR didn't work in onig_search().
+2005/03/01: [dist] remove oniggnu.h from MANIFEST-RUBY.
+ remove oniggnu.h from make 19.
+2005/03/01: [bug] (thanks matz) [ruby-dev:25778]
+ uninitialized member (OptEnv.backrefed_status)
+ was used.
+
+2005/02/19: Version 3.7.0
+
+2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
+2005/02/19: [new] (thanks Minero Aoki)
+ add onig_region_set().
+2005/02/19: [API] change onig_region_init() to extern.
+2005/02/19: [dist] remove reggnu.c from MANIFEST-RUBY.
+ remove reggnu.c from make 19.
+2005/02/19: [dist] update doc/API and doc/API.ja.
+2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
+2005/02/19: [impl] (thanks Alexey Zakhlestine)
+ change UChar* to const UChar* in oniguruma.h,
+ regenc.h and regparse.h.
+2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
+ onigposix.h and st.h.
+2005/02/12: [test] success in ruby 1.9.0 (2005-02-11) [i386-cygwin].
+2005/02/12: [bug] (thanks nobu) [ruby-dev:25676]
+ type_cclass_hash() fix overrun.
+2005/02/09: [test] success in ruby 1.9.0 (2005-02-09) [i686-linux].
+2005/02/09: [spec] add RE_OPTION_FIND_NOT_EMPTY etc.. to oniggnu.h.
+2005/02/09: [dist] remove hash.c.patch.
+2005/02/07: [impl] remove re_mbctab, mbctab_ascii etc...
+ (USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY)
+
+2005/02/04: Version 3.6.0
+
+2005/02/04: [test] success in ruby 1.9.0 (2005-02-04) [i686-linux].
+2005/02/01: [bug] add key_free() call to st_free_table().
+2005/02/01: [new] add onig_get_default_ambig_flag() and
+ onig_set_default_ambig_flag().
+2005/02/01: [dist] update MANIFEST-RUBY.
+2005/01/31: [test] success in ruby 1.9.0 (2005-01-29) [i686-linux].
+2005/01/31: [spec] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND
+ from ONIGENC_AMBIGUOUS_MATCH_DEFAULT.
+2005/01/31: [dist] update Makefile.in (make 19).
+2005/01/29: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.5.4 was merged to Ruby 1.9.0.
+2005/01/28: [impl] (thanks UK-taniyama)
+ add extern "C" { } directive to oniguruma.h, oniggnu.h
+ and onigposix.h for C++.
+2005/01/25: [impl] remove nested function call for xxx_code_to_mbclen().
+ (euc_kr.c, euc_tw.c, big5.c)
+
+2005/01/19: Version 3.5.4
+
+2005/01/19: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/19: [bug] (thanks Isao Sonobe)
+ callback function argument name_end of onig_foreach_name()
+ was wrong.
+ name key of name table should be null terminated for
+ character encoding length.
+ add strdup_with_null(), rename onig_strdup() to k_strdup().
+ use e->name_len in i_names().
+2005/01/17: [impl] (thanks UK-taniyama)
+ add HAVE_SYS_TYPES_H to config.h.in.
+
+2005/01/13: Version 3.5.3
+
+2005/01/13: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/13: [bug] ignore case match bug.
+ ex. /s+/iu.match("SSSSS") ==> [4..5]
+ fix OP_EXACT1_IC, OP_EXACTN_IC process.
+2005/01/13: [bug] (thanks Isao Sonobe)
+ ignore case match bug.
+ ex. /is/iu.match("ss") fail.
+ fix str_lower_case_match() etc.
+
+2005/01/05: Version 3.5.2
+
+2005/01/05: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/05: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
+2005/01/05: [bug] (thanks Isao Sonobe)
+ ignore case match bug.
+ ex. /s+/iu.match("sssss") ==> [4..5]
+ fix OP_EXACT1_IC, OP_EXACTN_IC process.
+2005/01/05: [bug] (thanks Isao Sonobe)
+ group name table should be renumbered.
+ add onig_renumber_name_table().
+2004/12/24: [dist] remove file onigcmpt200.h.
+
+2004/12/17: Version 3.5.1
+
+2004/12/17: [dist] add INSTALL-RUBY to archive.
+2004/12/16: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
+2004/12/16: [dist] update hash.c.patch.
+2004/12/15: [bug] (thanks matz)
+ char > 127 should be casted to unsigned char. (utf8.c)
+2004/12/13: [impl] add HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES definition
+ to oniguruma.h in the case __cplusplus.
+2004/12/06: [dist] update doc/RE and doc/RE.ja.
+2004/12/03: [impl] (thanks nobu)
+ st.h fix prototype for C++.
+
+2004/12/03: Version 3.5.0
+
+2004/12/02: [test] success in ruby 1.9.0 (2004-12-02) [i686-linux].
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-mswin32].
+2004/12/01: [dist] add make targets 19 and 19up to win32/Makefile.
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-cygwin].
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i686-linux].
+2004/12/01: [impl] double cast for escape warning in Cygwin.
+ (HashDataType* )((void* )(&e)) in regparse.c
+2004/12/01: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
+2004/12/01: [tune] change implementation of clear_opt_map_info().
+ (which was 10-16% cost in gprof result for my test program)
+2004/12/01: [dist] remove regex.c from distribution files.
+2004/11/30: [memo] remove targets 16 and 18 from Makefile.in.
+2004/11/30: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
+2004/11/30: [inst] add "cp -p st.[ch] st.[ch].ruby_orig" to "make 19".
+2004/11/30: [tune] map_position_value() return 20 if code is 0
+ and minimum enclen > 1.
+2004/11/30: [test] success in ruby 1.9.0 (2004-11-29) [i686-linux].
+2004/11/30: [impl] minor changes for multi-thread in regexec.c and regcomp.c.
+2004/11/30: [impl] change THREAD_PASS_LIMIT_COUNT value from 10 to 8.
+2004/11/30: [impl] add THREAD_ATOMIC_XXX to FreeNodeList access in regparse.c
+2004/11/29: [impl] add USE_MULTI_THREAD_SYSTEM.
+2004/11/29: [memo] add hash.c.patch to CVS.
+2004/11/29: [dist] change mail address to 'sndgk393 AT ...'
+2004/11/29: [dist] add -s option (silent mode) to test.rb.
+2004/11/29: [tune] change THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS value
+ from 20 to 8.
+2004/11/29: [inst] add make target "19up".
+2004/11/29: [dist] change Oniguruma Home Page URL.
+2004/11/29: [impl] remove onig_is_in_code_range_array().
+2004/11/29: [dist] fix doc/RE and RE.ja (character types).
+2004/11/26: [dist] fix win32/Makefile.
+2004/11/26: [dist] fix doc/RE and RE.ja (multibyte character types).
+2004/11/26: [impl] add onig_free_shared_cclass_table().
+2004/11/26: [impl] move definition USE_UNICODE_FULL_RANGE_CTYPE to regenc.h.
+2004/11/26: [impl] add opcode OP_CCLASS_NODE.
+2004/11/26: [impl] move definition of CClassNode to regint.h.
+2004/11/26: [impl] add type PointerType in regint.h.
+2004/11/25: [impl] remove ONIGENC_CTYPE_MOD_NOT.
+2004/11/25: [impl] rename onig_node_new_cclass_by_codepoint_range to
+ node_new_cclass_by_codepoint_range.
+2004/11/25: [impl] remove get_type_cc_node method from OnigEncodingType.
+2004/11/25: [impl] move implementation of shared char-class from enc/*.c
+ to regparse.c.
+2004/11/25: [dist] add hash.c.patch for Ruby 1.9 hash.c change.
+2004/11/22: [impl] change utf8_get_type_node().
+2004/11/22: [impl] add ONIGENC_CTYPE_MOD_NOT.
+2004/11/22: [bug] (thanks MIYAMUKO Katsuyuki)
+ ruby make test fail in HP-UX B.11.23 ia64.
+ should use tok->u.code instead of tok->u.c in
+ the case of TK_CODE_POINT.
+2004/11/19: [bug] (thanks Yoshida Masato)
+ invalid multibyte code causes segmentation fault.
+ ex. /[\xFF-\xFF]/u
+2004/11/19: [bug] (thanks Yoshida Masato)
+ illegal check in char-class range in UTF-8.
+ ex. s = "[\xC2\xA0-\xC3\xBE]"
+ p(Regexp.new(s, nil, "u") =~ "\xC3\xBE")
+2004/11/18: [impl] add onig_node_new_cclass_by_codepoint_range().
+2004/11/18: [impl] remove OnigCodePointRange type. (use OnigCodePoint[].)
+2004/11/17: [bug] (thanks nobu)
+ abort in "a".gsub(/a\Z/, "")
+ fix ONIGENC_STEP_BACK() argument in onig_search().
+2004/11/16: [impl] add key2 member to st_table_entry in st.[ch].
+ change API of st for non-null terminated string key.
+2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType.
+2004/11/15: [impl] add st.h and st.c from Ruby 1.9.
+ use st-hash always.
+2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'.
+ add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE.
+2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c.
+2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c.
+2004/11/11: [bug] fix enc/utf8.c.
+ size 0 array initializer was compile error in VC++.
+2004/11/09: [inst] (thanks Hiroki YAGITA)
+ change installed file mode to 0644.
+2004/11/09: [bug] (thanks UK-taniyama)
+ wrong definitions GET_RELADDR_INC(), GET_ABSADDR_INC()
+ etc... (NOT PLATFORM_UNALIGNED_WORD_ACCESS)
+2004/11/09: [impl] type cast in regexec() for remove compile time warning.
+ (WIN32, regposix.c)
+2004/11/08: [spec] fix Unicode character types.
+ 0x00ad (soft hyphen) should be [:cntrl:] and [:space:] type.
+ [0x0009..0x000d], 0x0085 should be [:print:] type.
+ 0x00ad should not be [:punct:] type.
+2004/11/08: [inst] fix Makefile.in. (for make ctest/ptest/testcu)
+2004/11/06: [impl] (thanks Kazuo Saito)
+ too many alternatives pattern causes core dump.
+ change implementation of onig_node_free().
+2004/11/05: [spec] rename ONIGERR_END_PATTERN_AT_BACKSLASH to
+ ONIGERR_END_PATTERN_AT_ESCAPE.
+2004/11/05: [impl] (thanks matz)
+ escape compile time warnings for x86-64 Linux.
+ StackIndex type int -> long
+2004/11/05: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.4.0 was merged to Ruby 1.9.0.
+
+2004/10/30: Version 3.4.0
+
+2004/10/30: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
+2004/10/30: [new] add hexadecimal digit char type. (\h, \H)
+ syntax: ONIG_SYN_OP2_ESC_H_XDIGIT
+2004/10/30: [bug] (thanks Guy Decoux)
+ reluctant infinite repeat bug.
+ ex. /^[a-z]{2,}?$/.match("aaa") fail.
+ fix OP_REPEAT_INC_NG process in match_at().
+
+2004/10/18: Version 3.3.1
+
+2004/10/18: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
+2004/10/18: [impl] (thanks Imai Yasumasa)
+ enclose #include <sys/types.h> by #ifndef __BORLANDC__.
+2004/10/18: [bug] (thanks Imai Yasumasa)
+ memory acess violation in select_opt_exact_info().
+2004/09/25: [dist] fix doc/API and doc/API.ja.
+2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for
+ the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ is not defined.
+
+2004/09/17: Version 3.3.0
+
+2004/09/17: [dist] add COPYING to program source files.
+2004/09/17: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/09/17: [bug] (thanks Isao Sonobe)
+ memory access violations in xxx_mbc_enc_len(),
+ and xxx_mbc_to_normalize() and
+ xxx_left_adjust_char_head().
+ add string range check in match_at() and onig_search().
+2004/09/08: [dist] change mail address format.(kosako AT sofnec ...)
+
+2004/09/04: Version 3.2.9
+
+2004/09/04: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/09/04: [bug] (thanks Bob Kerstetter and Richard Koch)
+ search fail in ignore case mode.
+ fix str_lower_case_match().
+2004/09/04: [inst] (thanks Isao Sonobe)
+ clear sample directory in 'make clean'.
+2004/09/04: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
+ meanings in XXXXX_mbc_to_normalize() and
+ XXXXX_is_mbc_ambiguous().
+2004/08/28: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
+ meanings in iso_8859_XX_mbc_to_normalize() and
+ iso_8859_XX_is_mbc_ambiguous().
+
+2004/08/24: Version 3.2.8
+
+2004/08/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/08/24: [spec] add ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY.
+ /a{n}?/ == /(?:a{n})?/
+2004/08/24: [dist] fix doc/RE and doc/RE.ja.
+2004/08/24: [bug] (thanks starfish)
+ memory leak in set_optimize_exact_info().
+
+2004/08/21: Version 3.2.7
+
+2004/08/21: [test] success in ruby 1.8.2 (2004-07-28) [i686-linux].
+ (1.8.2 preview2)
+2004/08/21: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/08/21: [bug] (thanks Isao Sonobe) (thanks kage)
+ memory access violation in bm_search_notrev().
+ (forgotten to merge from 2.X)
+
+2004/07/24: Version 3.2.6
+
+2004/07/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/07/24: [test] success in ruby 1.8.2 (2004-07-16) [i686-linux].
+2004/07/24: [bug] fix warnings for regexec.c. (gcc 2.91.66)
+2004/07/24: [memo] change version control system from Subversion
+ to CVS 1.11.17.
+2004/07/20: [bug] (thanks Isao Sonobe)
+ illegal result in negative character class in ignore case
+ mode. fix pair-ambig-codes process in parse_exp().
+ ex. /[^a]/i.match("A")
+2004/07/20: [bug] (thanks Isao Sonobe)
+ undefined bytecode error happens in UTF-16BE etc..
+ compile_length_cclass_node() was not consistent with
+ compile_cclass_node().
+
+2004/07/01: Version 3.2.5
+
+2004/07/01: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/07/01: [new] add onig_get_syntax_{op,op2,behavior,options}.
+2004/07/01: [bug] (thanks Isao Sonobe)
+ invalid result in onig_capture_tree_traverse().
+ fix make_capture_history_tree().
+
+2004/06/29: Version 3.2.4
+
+2004/06/29: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/29: [new] (thanks Isao Sonobe)
+ add onig_number_of_captures().
+
+2004/06/25: Version 3.2.3
+
+2004/06/25: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/25: [bug] (thanks Isao Sonobe)
+ invalid result in onig_capture_tree_traverse().
+ fix make_capture_history_tree().
+
+2004/06/24: Version 3.2.2
+
+2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/06/24: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/24: [new] (thanks Isao Sonobe)
+ add onig_number_of_capture_histories().
+2004/06/24: [bug] (thanks Isao Sonobe)
+ invalid char position match in UTF-16 and UTF-32.
+ add onigenc_always_false_is_allowed_reverse_match().
+
+2004/06/17: Version 3.2.1
+
+2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/06/17: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/06/17: [impl] should not use OP_REPEAT for (...)? even if target size
+ is long.
+2004/06/17: [bug] (thanks nobu) [ruby-dev:23703]
+ should use STACK_AT() instead of stkp in OP_REPEAT_INC.
+ add IN_VAR_REPEAT flag in setup_tree().
+2004/06/16: [impl] change select_opt_exact_info() to use ByteValTable[].
+2004/06/16: [impl] change map_position_value() table values.
+2004/06/14: [impl] (thanks John Carter)
+ RelAddrType, AbsAddrType and LengthType change
+ from short int to int type for the very long string match.
+2004/06/14: [bug] (thanks Greg A. Woods)
+ fix nmatch argument of regexec() is smaller than
+ reg->num_mem + 1 case. (POSIX API)
+2004/06/14: [spec] (thanks Greg A. Woods)
+ set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API)
+
+2004/06/10: Version 3.2.0
+
+2004/06/10: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/10: [test] success in ruby 1.9.0 (2004-05-27) [i386-mswin32].
+2004/06/10: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/06/10: [dist] add README.ja.
+2004/06/10: [new] add onig_copy_encoding().
+2004/06/10: [API] add encoding argument to onig_set_meta_char().
+ add meta_char_table member to OnigEncodingType.
+2004/06/08: [dist] add doc/API.ja.
+2004/06/07: [API] add num_of_elements member to OnigCompileInfo.
+2004/05/29: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.1.0 was merged to Ruby 1.9.0.
+2004/05/26: [impl] rename NST_SIMPLE_REPEAT to NST_STOP_BT_SIMPLE_REPEAT.
+2004/05/26: [impl] doesn't need to check that target's simple repeat-ness
+ for EFFECT_MEMORY type node in setup_tree().
+
+2004/05/25: Version 3.1.0
+
+2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/05/25: [test] success in ruby 1.9.0 (2004-05-23) [i686-linux].
+2004/05/25: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/05/25: [bug] (thanks Masahiro Sakai) [ruby-dev:23560]
+ ruby -ruri -ve 'URI::ABS_URI =~
+ "http://example.org/Andr\xC3\xA9"'
+ nested STK_REPEAT type stack can't backtrack repeat_stk[].
+ add OP_REPEAT_INC_SG and OP_REPEAT_INC_NG_SG.
+2004/05/25: [new] support UTF-32LE. (ONIG_ENCODING_UTF32_LE)
+2004/05/25: [new] support UTF-32BE. (ONIG_ENCODING_UTF32_BE)
+2004/05/24: [impl] divide enc/utf16.c to utf16_be.c and utf16_le.c.
+2004/05/24: [impl] add enc/unicode.c.
+2004/05/24: [API] change calling sequences of onig_new_deluxe() and
+ onig_recompile_deluxe().
+ define OnigCompileInfo type.
+2004/05/21: [impl] perform ensure process for rb_trap_exec() in match_at().
+ add onig_exec_trap() and CHECK_INTERRUPT_IN_MATCH_AT.
+2004/05/21: [impl] add regex status check to onig_match().
+2004/05/21: [new] add onig_get_capture_tree() and
+ onig_capture_tree_traverse().
+2004/05/20: [spec] (thanks Isao Sonobe)
+ capture history return capture data tree.
+ (see sample/listcap.c)
+2004/05/19: [bug] (thanks Simon Strandgaard)
+ Control-C does not work in matching process on Ruby.
+ add calling of CHECK_INTERRUPT into match_at().
+ ex. /<(?:[^">]+|"[^"]*")+>/.match('<META http-equiv= \
+ "Content-Type content="text/html; charset=iso-8859-1">')
+2004/05/19: [bug] (thanks Simon Strandgaard)
+ define virtual codepoint values for invalid encoding
+ byte 0xfe and 0xff in UTF-8.
+ ex. /\w+/u.match("%a\xffb\xfec%") ==> "a"
+2004/05/19: [spec] (thanks Simon Strandgaard)
+ too big backref number should be treated as a sequence of
+ an octal char and number digits.
+ ex. /b\3777\c/.match("b\3777\c")
+2004/05/17: [spec] rename encoding names "UTF-16 BE" and "UTF-16 LE"
+ to "UTF-16BE" and "UTF-16LE".
+2004/05/17: [impl] move ismbchar() and mbclen() from oniguruma.h to oniggnu.h.
+2004/05/17: [impl] rename onigenc_single_byte_is_allowed_reverse_match() to
+ onigenc_always_true_is_allowed_reverse_match().
+
+2004/05/14: Version 3.0.0
+
+2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/05/14: [test] success in ruby 1.9.0 (2004-05-14) [i686-linux].
+2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+ (* need to edit parse.y:
+ register int c; ---> int c; in yylex())
+2004/05/14: [impl] add regext.c.
+2004/05/14: [spec] KOI8 is not included in library archive by default setup.
+2004/05/14: [impl] implementation changes are completed for all encoding files.
+2004/05/12: [impl] add divide_ambig_string_node().
+ ambiguous string is divided and normalized before
+ optimization and compilation process.
+2004/05/11: [dist] remove INSTALL-RUBY from distribution.
+2004/04/28: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.8 was merged to Ruby 1.9.0.
+2004/04/26: [spec] change value DEFAULT_MATCH_STACK_LIMIT_SIZE = 0 : unlimited
+2004/04/26: [new] add onig_get_match_stack_limit_size() and
+ onig_set_match_stack_limit_size().
+2004/04/26: [bug] add error check to re.c.181.patch and re.c.168.patch.
+2004/04/23: [impl] remove ctype_support_level from OnigEncodingType.
+2004/04/22: [spec] allow the range from single byte char to multibyte char in
+ character class for implementation reason.
+ ex. /[a-\xbb\xcc]/ in EUC-JP encoding.
+2004/04/21: [impl] remove max_enc_len_by_first_byte() from OnigEncodingType.
+2004/04/20: [new] add onig_copyright().
+2004/04/20: [impl] add regversion.c.
+2004/04/15: [new] add onig_get_ambig_flag().
+2004/04/14: [bug] (thanks Isao Sonobe)
+ undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST
+ is setted.
+ should finish matching process if find-condition
+ is fail at OP_END in match_at().
+2004/04/12: [impl] add ambig_flag to regex_t.
+2004/04/09: [impl] move onig_set_meta_char() to regsyntax.c.
+2004/04/09: [bug] (thanks HIROSE Masaaki) fix onig_version().
+2004/04/08: [impl] add regsyntax.c.
+2004/04/07: [new] support UTF-16 LE. (ONIG_ENCODING_UTF16_LE)
+2004/04/05: [impl] add ONIGENC_CTYPE_NEWLINE.
+2004/04/05: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.6 was merged to Ruby 1.9.0.
+2004/04/02: [memo] Version 2.2.6 was released.
+2004/03/26: [new] support UTF-16 BE. (ONIG_ENCODING_UTF16_BE)
+2004/03/25: [spec] support non 8-bit encodings.
+2004/03/16: [memo] 2.X branch for 8-bit encodings only.
+
+2004/03/16: Version 2.2.5
+
+2004/03/16: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/03/16: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
+2004/03/16: [impl] add property name to error message of
+ ONIGERR_INVALID_CHAR_PROPERTY_NAME.
+2004/03/16: [spec] allow prefix 'Is' for \p{...} in ONIG_SYNTAX_PERL.
+ add syntax op. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS.
+2004/03/15: [dist] add sample/syntax.c.
+2004/03/15: [spec] support NOT op. in char property. \p{^...}, \P{^...}.
+ add syntax op. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT.
+2004/03/15: [spec] rename ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY to
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY.
+2004/03/10: [impl] move ONIGERR_XXX from regenc.h to oniguruma.h,
+ rename ONIGERR_XXX to ONIGENCERR_XXX in regenc.h.
+2004/03/08: [impl] (thanks eban)
+ replace defined(__CYGWIN__) to defined(__GNUC__).
+2004/03/08: [bug] (thanks eban) [ruby-dev:23172]
+ need to separate initialization for bcc32.
+2004/03/06: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.4 was merged to Ruby 1.9.0.
+2004/03/05: [API] change second argument type of onig_set_meta_char()
+ from unsigned int to OnigCodePoint.
+2004/03/05: [dist] (thanks Kazuo Saito)
+ add MANIFEST-RUBY.
+
+2004/03/04: Version 2.2.4
+
+2004/03/04: [impl] (thanks Moriyoshi Koizumi)
+ fix many warnings in Win32 VC++ with /W3 option.
+
+2004/03/02: Version 2.2.3
+
+2004/03/02: [bug] (thanks Isao Sonobe)
+ return invalid capture region value if capture history
+ is used. (OP_MEMORY_END_PUSH_REC bug)
+ ex. /\g<p>(?@<p>\(\g<s>\)){0}(?<s>(?:\g<p>)*|){0}/
+ .match("((())())")
+2004/03/02: [impl] (thanks Kazuo Saito)
+ add :nodoc: to onig_stat_print() for RDoc.
+2004/03/02: [impl] don't use ONIG_SOURCE_IS_WRAPPED.
+
+2004/02/27: Version 2.2.2
+
+2004/02/27: [impl] fix the position of onig_stat_print().
+2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h
+ for ignored by RDoc.
+
+2004/02/26: Version 2.2.1
+
+2004/02/26: [bug] [bugs.php.net:#26677] (thanks behrens)
+ invalid definition at onig_error_code_to_str()
+ in the case of NOT HAVE_STDARG_PROTOTYPES.
+
+2004/02/25: Version 2.2.0
+
+2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
+2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in
+ onigenc_is_code_ctype() in the case of Ruby M17N.
+2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16)
+2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c.
+2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14)
+2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13)
+2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10)
+2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig().
+2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9)
+2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R.
+2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex.
+2004/02/17: [spec] check capture status for empty infinite loop.
+ [ruby-dev:20224] etc...
+ ex. /(?:\1a|())*/.match("a"),
+ /(?:()|()|()|(x)|()|())*\2b\5/.match("b")
+ add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK.
+ add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH.
+ add stack type STK_NULL_CHECK_END.
+2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c.
+2004/02/13: [bug] (thanks Simon Strandgaard)
+ parsing of nested repeat was invalid.
+ ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/,
+ should be /a(?:b{2,3}*)/
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ OP_REPEAT_INC_NG process in match_at() is wrong.
+ ex. bad match /a.{0,2}?a/ =~ "0aXXXa0"
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ wrong fetch after (?x) option. ex. "(?x)\ta .\n+b"
+2004/02/12: [bug] (thanks Simon Strandgaard)
+ [\^] is not a empty char class.
+2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(),
+ onig_set_syntax_behavior(), onig_set_syntax_options().
+2004/02/06: [dist] add a new target 'site' to Makefile.in.
+2004/02/06: [dist] add index.html.
+2004/02/03: [bug] oniggnu.h was not installed by 'make install'.
+
+2004/02/02: Version 2.1.0
+
+2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux].
+2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11)
+2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5)
+2004/02/02: [impl] should check single byte encoding or not in and_cclass()
+ and or_cclass().
+2004/01/30: [dist] add oniggnu.h.
+2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type.
+2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8)
+2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7)
+2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6)
+2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R)
+2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8)
+2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c.
+2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4)
+2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3)
+2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong.
+ (0x80 - 0xff is not ASCII)
+2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2)
+2004/01/23: [dist] add enc/isotable.c.
+2004/01/22: [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
+2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and
+ ALIGNMENT_RIGHT() was wrong.
+ type casting should be unsigned int, not int.
+2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__)
+ to unaligned word access condition. (AMD64 ?)
+2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c.
+2004/01/21: [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
+2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/01/20: [dist] change Makefile.in.
+2004/01/20: [spec] add \p{...}, \P{...} in char class.
+2004/01/20: [new] character property operators \p{...}, \P{...}.
+ supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL.
+2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/)
+2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h.
+2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch.
+2004/01/16: [dist] update sample/*.c for new API.
+2004/01/16: [dist] add onigcomp200.h. (for old API compatibility)
+2004/01/16: [dist] update documents API, RE and RE.ja.
+2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_,
+ ENC_ -> ONIGENC, enc_ -> onigenc_.
+2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD().
+ rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to
+ ENC_CTYPE_SUPPORT_LEVEL_SB.
+2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to
+ PLATFORM_UNALIGNED_WORD_ACCESS.
+2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000.
+2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h.
+ remove code_to_mbc_first member in RegCharEncodingType.
+2004/01/13: [impl] remove head byte bitset information in cclass->mbuf.
+2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for
+ escape conflict.
+
+2003/12/24: Version 2.0.0
+
+2003/12/24: [spec] ignore case option is effective to numbered char.
+ ex. /\x61/i =~ "A"
+2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux].
+2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2003/12/24: [test] success in regex.c compile test on ruby-m17n.
+ (but can't make miniruby because re.c patch fail.)
+2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5.
+2003/12/22: [spec] implement fold match on UTF-8 encoding.
+2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to
+ RegCharEncoding type.
+2003/12/19: [impl] add add_ctype_to_cc() in regparse.c.
+2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case.
+2003/12/19: [impl] change ENC_CODE_TO_MBC() interface.
+2003/12/18: [new] implement fold match. (variable number of char
+ match in ignore case mode.)
+ ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss".
+2003/12/17: [impl] refactoring of encoding system.
+2003/12/17: [impl] add enc_init() in regenc.c.
+2003/12/17: [new] support Big5. (REG_ENCODING_BIG5)
+2003/12/16: [impl] change CodePoint from unsigned int to unsigned long.
+2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15)
+2003/12/16: [impl] change P_() macro definition condition for Win32.
+2003/12/16: [dist] add sample/encode.c
+2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1)
+2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX.
+2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding.
+2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...)
+2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern().
+ change local var. type char* to UChar*.
+2003/12/15: [impl] remove REG_MBLEN_TABLE[].
+2003/12/15: [spec] rename function prefix regex_get_prev_char_head(),
+ regex_get_left_adjust_char_head() and
+ regex_get_right_adjust_char_head() to enc_xxxxxx().
+2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_.
+2003/12/12: [impl] remove USE_SBMB_CLASS.
+2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len().
+2003/12/12: [impl] rename WCINT to CodePoint.
+2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h.
+2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256].
+2003/12/11: [impl] remove RegAsciiCaseAmbigTable.
+2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison.
+2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. ***
+2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h.
+2003/12/08: [impl] add IS_ENC_WORD() to regenc.h.
+2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX().
+2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h.
+2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF.
+2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND |
+ REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF)
+ to RegSyntaxGrep.
+2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT.
+2003/12/04: [spec] move declarations of regex_get_default_encoding() and
+ regex_set_default_encoding() from oniguruma.h to regenc.h.
+2003/12/03: [new] add regex_get_default_encoding() and
+ regex_set_default_encoding().
+2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed.
+ (current default value, not initial default value.)
+2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX.
+2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"'
+2003/12/02: [spec] move regex_set_default_trans_table() declaration
+ from oniguruma.h to regenc.h. (obsoleted API)
+2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and
+ AmbiguityTable to regenc.c.
+2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c.
+2003/12/01: [dist] add regenc.h and regenc.c.
+2003/11/18: [dist] change testconv.rb.
+2003/11/18: [bug] (thanks Masaru Tsuda)
+ memory leak in parse_subexp().
+2003/11/18: [bug] (thanks Masaru Tsuda)
+ memory leak in names_clear() and parse_char_class().
+2003/11/17: [bug] memory leak in parse_char_class().
+2003/11/17: [bug] (thanks Masaru Tsuda)
+ OptExactInfo length should not over OPT_EXACT_MAXLEN.
+ (concat_opt_exact_info_str())
+
+2003/11/12: Version 1.9.5
+
+2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux].
+2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR.
+2003/11/11: [dist] add a sample program sample/sql.c.
+2003/11/11: [new] add variable meta character.
+ regex_set_meta_char()
+2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS.
+2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to
+ REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE,
+ REG_SYN_OP_QMARK_GROUP_EFFECT to
+ REG_SYN_OP2_QMARK_GROUP_EFFECT.
+2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode.
+2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT.
+2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to
+ REG_SYN_WARN_CC_OP_NOT_ESCAPED.
+2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func().
+2003/10/30: [new] add regex_name_to_backref_number().
+ (for multiplex definition name, see sample/names.c)
+2003/10/30: [spec] add name_end and reg argument to callback function of
+ regex_foreach_name(). (see sample/names.c)
+2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME.
+ add error code REGERR_MULTIPLEX_DEFINED_NAME.
+2003/10/14: [dist] modify sample/simple.c.
+2003/10/03: [bug] (thanks nobu) [ruby-dev:21472]
+ sub-anchor of optimization map info was wrong
+ in concat_left_node_opt_info().
+ ex. /^(x?y)/ = "xy" fail.
+
+2003/09/17: Version 1.9.4
+
+2003/09/17: [spec] change specification of char-class range in ignore case mode
+ follows with Ruby 1.8(2003-09-17).
+ ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i
+ ==> H-Z, h-z, 0x5b-0x60, a-c, A-C
+2003/09/16: [bug] (thanks Guy Decoux)
+ remove env->option == option check in parse_effect().
+ change env->option for dynamic option in parse_exp().
+ (ex. bad match /(?i)(?-i)a/ =~ "A")
+2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to
+ REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC,
+ REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC.
+2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR.
+2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to
+ REG_SYN_OP2_ESC_CAPITAL_M_BAR_META,
+ REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE,
+ REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP,
+ REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR,
+ REG_SYN_OP_ESC_GNU_BUF_ANCHOR to
+ REG_SYN_OP2_ESC_GNU_BUF_ANCHOR,
+ REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS,
+ REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD,
+ REG_SYN_OP_ESC_WORD_BEGIN_END to
+ REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END,
+ REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND,
+ REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE,
+ REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT,
+ REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC,
+ REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP,
+ REG_SYN_CONTEXT_INDEP_OPS to
+ REG_SYN_CONTEXT_INDEP_REPEAT_OPS,
+ REG_SYN_CONTEXT_INVALID_REPEAT_OPS to
+ REG_SYN_CONTEXT_INVALID_REPEAT_OPS.
+ add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR.
+2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR,
+ REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF,
+ REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF,
+ REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF,
+ REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF,
+ REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE,
+ REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE,
+ REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL,
+ REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL,
+ REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP,
+ REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT,
+ REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT,
+ REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY,
+ REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT,
+ REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to
+ REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL},
+ REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL,
+ REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP
+ and REG_SYN_OP2_ESC_K_NAMED_BACKREF.
+2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for
+ no-name group in noname_disable_map().
+ ex. /(a+)*(?<name>...)/
+2003/09/02: [impl] include <stdio.h> is forgotten to erase in regcomp.c.
+2003/09/01: [dist] update doc/RE and doc/RE.ja.
+2003/08/26: [bug] (thanks Guy Decoux)
+ should not double free node at the case TK_CC_CC_OPEN
+ in parse_char_class().
+
+2003/08/19: Version 1.9.3
+
+2003/08/19: [inst] change re.c.180.patch.
+2003/08/19: [impl] rename 'list of captures' to 'capture history'.
+2003/08/19: [dist] add doc/RE.ja. (Japanese)
+2003/08/19: [new] add regex_copy_syntax().
+2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to
+ REG_SYN_OP2_ATMARK_CAPTURE_HISTORY.
+2003/08/18: [spec] (thanks nobu)
+ don't use IMPORT in oniguruma.h and onigposix.h.
+2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb.
+2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in.
+2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1.
+2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0.
+2003/08/18: [bug] (thanks nobu)
+ single/multi-byte decision was wrong in parse_char_class().
+ add regex_wc2mblen().
+ should not set fetched to 1 in TK_RAW_BYTE case.
+2003/08/18: [bug] should update BitSet in the case inc_n >= 0
+ in add_wc_range_to_buf().
+2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c.
+2003/08/11: [bug] should clear region->list in regex_region_resize().
+
+2003/08/08: Version 1.9.2
+
+2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000
+ VC++ 6.0 and Cygwin.
+2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform,
+ because definition is added in win32\win32.h.
+2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux.
+2003/08/08: [dist] change re.c.180.patch and re.c.168.patch.
+2003/08/08: [new] (thanks akr)
+ implemented list of captures. (?@...), (?@<name>...)
+2003/08/07: [dist] add sample/listcap.c.
+2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at().
+ renewal of mem_start_stk[] should be after
+ STACK_PUSH_MEM_END() call.
+2003/07/29: [new] add regex_get_encoding(), regex_get_options() and
+ regex_get_syntax().
+2003/07/25: [spec] (thanks akr)
+ change group(...) to shy-group(?:...) if named group is
+ used in the pattern.
+ add REG_SYN_CAPTURE_ONLY_NAMED_GROUP.
+2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to
+ REG_OPTION_DONT_CAPTURE_GROUP.
+ add REG_OPTION_CAPTURE_GROUP.
+2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP.
+2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME.
+2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME
+ to REGERR_INVALID_CHAR_IN_GROUP_NAME.
+2003/07/17: [spec] restrict usable chars of group name to alphabet, digit,
+ '_' or multibyte-char in fetch_name(). [ruby-dev:20706]
+2003/07/16: [impl] minor change of sample/names.c.
+2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP.
+2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case.
+2003/07/14: [API] add regex_number_of_names().
+2003/07/08: [impl] change error message for undefined group number call.
+ 'undefined group reference: /(a)\g<2>/'
+ --> 'undefined group <2> reference: /(a)\g<2>/'
+2003/07/08: [dist] modify doc/RE.
+2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code.
+ add IS_DYNAMIC_OPTION() to regint.h.
+2003/07/07: [spec] called group should not ignore outside option (?i:...).
+ ex. /(?i:(?<n>(a)\2)){0}\g<n>/.match("aA")
+ add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC.
+ set option status to effect memory in optimize_node_left().
+2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and
+ OP_ANYCHAR_ML_START_PEEK_NEXT.
+2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1.
+2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE.
+
+2003/07/04: Version 1.9.1
+
+2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET)
+2003/07/04: [spec] check mbuf member in the case of
+ REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class().
+2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
+ should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED.
+2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and
+ REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu)
+2003/07/03: [spec] add REG_SYN_OP_ESC_CONTROL_CHAR flag.
+2003/07/03: [spec] remove REG_SYN_OP_ESC_OCTAL3 and REG_SYN_OP_ESC_X_HEX2
+ flag from RegSyntaxGnuRegex.
+2003/07/03: [spec] remove REG_SYN_OP_NON_GREEDY flag from RegSyntaxGnuRegex.
+2003/07/02: [dist] fix doc/RE.
+2003/07/01: [impl] add config flag USE_VARIABLE_SYNTAX.
+ (turn off variable syntax on Ruby)
+2003/07/01: [spec] add syntax behavior REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND.
+2003/06/30: [spec] allow different length top-level alternatives
+ in look-behind. ex. (?<=abc|abcd), (?<!a|bc)
+2003/06/26: [spec] add option REG_OPTION_NEGATE_SINGLELINE.
+2003/06/26: [spec] should default on REG_OPTION_SINGLELINE
+ for REG_SYNTAX_PERL and REG_SYNTAX_JAVA.
+2003/06/26: [impl] add options member to RegStntaxType.
+2003/06/26: [spec] don't change the meaning of '\Z' for REG_OPTION_SINGLELINE.
+2003/06/25: [dist] don't use option REG_NEWLINE for sample/posix.c.
+2003/06/25: [dist] modify testconv.rb.
+ should match and convert double quoted string data.
+ ex. x(/\ca/, "\001", 0, 1)
+2003/06/25: [impl] add REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL and
+ REG_SYN_OP2_ESC_M_BAR_META.
+2003/06/25: [impl] add REG_SYN_OP_ESC_OCTAL3 and REG_SYN_OP_ESC_X_HEX2.
+2003/06/24: [impl] add REG_SYN_OP2_ESC_V_VTAB. (\v is VTAB)
+2003/06/24: [bug] should invert REG_OPTION_SINGLELINE flag
+ in REG_SYN_OP2_OPTION_PERL.
+2003/06/24: [impl] add REG_SYN_OP2_OPTION_PERL and REG_SYN_OP2_OPTION_RUBY.
+ meaning of (?m) and (?s) are depend on syntax.
+
+2003/06/20: Version 1.9.0
+
+2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. (thanks akr)
+2003/06/19: [inst] rename regex.h to oniguruma.h.
+2003/06/18: [impl] change REG_EXTERN setting condition. (__CYGWIN__)
+2003/06/18: [bug] return wrong result UTF-8 case in regex_mb2wc().
+2003/06/18: [impl] add REG_SYN_OP2_POSSESSIVE_INTERVAL. a{n,m}+
+2003/06/18: [new] add REG_SYNTAX_JAVA.
+2003/06/18: [spec] add REG_SYN_OP_QUOTE.
+2003/06/18: [spec] add op2 member to RegSyntaxType.
+ rename some REG_SYN_OP_XXX to REG_SYN_OP2.
+2003/06/16: [new] Perl-like quotation operator \Q, \E.
+2003/06/16: [spec] should not control ignore case mode by escaped char.
+ ex. /\J/i =~ "j", /[\J]/i =~ "j" (same as Perl)
+2003/06/13: [bug] modify onigposix.h.
+2003/06/13: [bug] should use -DIMPORT for link with DLL in win32/Makefile.
+2003/06/13: [dist] add sample/names.c
+2003/06/12: [bug] range should be from - 1 in not_wc_range_buf().
+2003/06/12: [spec] should warn for '-' before '&&' operator in char-class.
+2003/06/12: [new] add REG_SYNTAX_PERL.
+2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
+2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]]
+2003/06/11: [new] char-class in char-class (as Java(TM)).
+2003/06/11: [spec] change AND operator in char-class from &&[..] to &&.
+2003/06/04: [spec] {n,m}+ should not be possessive operator.
+ ex. a{3}+ should be (?:a{3})+
+2003/06/03: [bug] should compare strings with min-length in is_not_included().
+2003/06/03: [impl] automatic possessivate optimization. a*b ==> (?>a*)b
+ (thanks Jeffrey E. F. Friedl)
+2003/06/02: [impl] remove multibyte-BitSet for OP_CCLASS_MB/OP_CCLASS_MB_NOT.
+2003/05/30: [new] char class intersection operator &&[...] like Java(TM).
+ (thanks akr)
+2003/05/30: [bug] should use bbuf_free() for CClassNode in regex_node_free().
+2003/05/29: [bug] wrong usage of syntax REG_SYN_ALLOW_EMPTY_RANGE_IN_CC.
+ /[d-a]/ should be error.
+2003/05/28: [impl] optimize stop-backtrack compiled code.
+ (/(?>a*)/, /(?>\w+)/ etc...)
+ add OP_POP opcode.
+2003/05/28: [new] possessive repeat operator. (?+, *+, ++, {n,m}+)
+2003/05/27: [spec] '-' at beginning of char-class should be warn only if
+ it is start of range. (ex. /[--a]/)
+2003/05/27: [spec] should not warn for right bracket at beginning of pattern.
+ ex. /]aaa/
+2003/05/27: [spec] change CCEND_ESC_WARN() from VERB_WARNING() to WARNING().
+2003/05/27: [spec] /[]aaa/ should be empty char-class error.
+ /[]aaa]/ should be warn for 'without backslash'.
+ (add char_exist_check() in regparse.c)
+2003/05/26: [bug] OP_REPEAT in recursive subexp call.
+ ex. /(?<n>(a|b\g<n>c){3,5})/.match("baaaaca") => "baaaaca"
+ was wrong result. (should be "aaaa")
+2003/05/26: [impl] add num_call member to regex_t.
+2003/05/26: [impl] add repeat_range member to regex_t.
+ (for delete upper,lower members from StackType.u.repeat)
+2003/05/26: [bug] change print_names() to external regex_print_names().
+2003/05/26: [tune] change OP_NULL_CHECK_END process in match_at().
+2003/05/26: [spec] change CCEND_ESC_WARN() from WARNING() to VERB_WARNING().
+2003/05/26: [spec] remove POSIXLINE option. (?p:...)
+ (be made the same as Ruby.)
+2003/05/22: [spec] use OP_NULL_CHECK_XXX only if repeat is infinite.
+ prev. /(?:()|()){0,10}\1\2/ =~ "" ==> FAIL
+ now /(?:()|()){0,10}\1\2/ =~ "" ==> MATCH
+
+2003/05/22: [impl] change target_empty setting condition in setup_tree().
+2003/05/19: [impl] avoid zero length repeat optimization. (thanks matz)
+ /()*/ ==> /()?/, /()+/ ==> /()/ etc...
+2003/05/19: [impl] minor changes for gcc -Wall. (-DREG_DEBUG_STATISTICS case)
+2003/05/19: [spec] rename regex_foreach_names() to regex_foreach_name().
+2003/05/16: [new] add --with-statistics option to configure.
+2003/05/16: [bug] move RegOpInfo[] definition to regint.h.
+2003/05/16: [new] add regex_version().
+
+2003/05/14: Version 1.8.6
+
+2003/05/14: [bug] use _vsnprintf() on Win32.
+2003/05/14: [spec] define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE.
+ (/\n$/ =~ "\n", /\n\Z/ =~ "\n") [ruby-dev:20125]
+2003/05/14: [impl] minor changes for gcc -Wall.
+2003/05/14: [impl] add string.h check in AC_CHECK_HEADERS().
+2003/05/13: [impl] minor changes for gcc -Wall.
+2003/05/13: [impl] add regex_snprintf_with_pattern().
+2003/05/13: [spec] add warning for char class meta character without escape
+ in Ruby mode ('[', '-', ']').
+2003/05/13: [impl] define WARNING() and VERB_WARNING() in regint.h.
+2003/05/13: [bug] correct is_code_ascii() for /[[:ascii:]]/.
+2003/05/12: [dist] add regular expression document (doc/RE).
+2003/05/12: [spec] specification of $(END_LINE) was made the same as Ruby 1.8.
+ [ruby-dev:20130] (thanks matz)
+2003/05/12: [memo] shifted to Subversion(version 0.21.0) from CVS.
+
+2003/03/19: Version 1.8.5
+
+2003/03/19: [impl] change REG_EXTERN definition. (thanks nobu)
+2003/03/19: [impl] abbreviation for long error_par in regex_error_code_to_str().
+2003/03/18: [dist] change re.c.XXX.patch for GNU regex API changes.
+2003/03/18: [spec] change API regex_new(), regex_recompile() and
+ regex_error_code_to_str().
+ change API re_compile_pattern() and re_recompile_pattern().
+2003/03/18: [spec] replace REGERR_END_PATTERN_AT_GROUP_{COMMENT|OPTION} to
+ REGERR_END_PATTERN_IN_GROUP.
+2003/03/17: [impl] should free err_arg.
+2003/03/17: [bug] mistake(high -> to) in add_wc_range_to_buf().
+2003/03/17: [spec] add err_arg argument to regex_new() and regex_recompile().
+ for detail error message. (thanks akr)
+
+2003/03/12: Version 1.8.4
+
+2003/03/12: [tune] use cached value of effect node in get_min_match_length().
+2003/03/12: [bug] escaped alphabet should be TK_RAW_BYTE
+ in fetch_token() and fetch_token_in_cc().
+2003/03/12: [spec] change named backref and subexp call format.
+ backref: \k<name>, call: \g<name> (thanks akr)
+2003/03/11: [inst] add regparse.[ch] in win32/Makefile.
+2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't setted
+ then compile error in unset_addr_list_fix(). (thanks knu)
+2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h.
+2003/03/10: [bug] should handle multi-byte code name in fetch_name().
+2003/03/10: [spec] remove REGERR_TABLE_FOR_IGNORE_CASE_IS_NOT_SETTED.
+2003/03/10: [spec] support POSIX API option REG_NOSUB.
+ add comp_options member to POSIX API regex_t.
+
+2003/03/10: Version 1.8.3
+
+2003/03/10: [bug] can not compile with Ruby 1.6.8.
+ (inconsistent st.h with 1.6 and 1.8)
+ use hash table on Ruby 1.8 only.
+2003/03/10: [spec] forbid to use '\' in group name.
+2003/03/08: [impl] remove check_backref_number().
+2003/03/08: [bug] called group in 0-repeat should not be eliminated from
+ compile code. ex. /(?*n)(?<n>){0}/ (thanks akr)
+ add is_refered member to QualifierNode.
+2003/03/07: [impl] use hash table(st.[ch]) for implementation of name table.
+ (enable on Ruby in default)
+2003/03/07: [new] add regex_foreach_names().
+2003/03/06: [impl] add member reg->stack_pop_level.
+2003/03/06: [impl] add operator OP_MEMORY_START and member reg->backtrack_mem.
+2003/03/06: [bug] if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY,
+ should handle backtrack of MEM_END.
+ add OP_MEMORY_END_PUSH and OP_MEMORY_END_PUSH_REC.
+2003/03/06: [impl] rename OP_MEMORY_END_PUSH to OP_MEMORY_END_MARK.
+2003/03/06: [spec] change error messages.
+2003/03/06: [tune] add tiny_pop check in STACK_POP.
+
+2003/03/05: Version 1.8.2
+
+2003/03/05: [impl] use cache info in EFFECT_MEMORY case
+ in optimize_node_info().
+2003/03/05: [impl] add EFFECT_MEMORY node reference count check
+ in optimize_node_left().
+2003/03/05: [impl] add min-len, max-len, char-len cache in EffectNode.
+2003/03/05: [spec] allow to call in look behind. ex. /(?<=(?*a))/
+2003/03/05: [bug] forgotten N_ANCHOR case in check_backref_number(),
+ subexp_inf_recursive_check_trav() etc...
+2003/03/05: [impl] rename USE_ONIGURUMA_EXTENSION to USE_SBMB_CLASS.
+2003/03/04: [impl] add CALL-node info in optimize_node_left().
+2003/03/04: [spec] prohibit left recursion of subexp call. ex. (?<n>|(?*n)a)
+ add subexp_inf_recursive_check_trav().
+2003/03/04: [spec] rename REG_SYN_STRICT_CHECK_BACKREF_NUMBER
+ to REG_SYN_STRICT_CHECK_BACKREF
+2003/03/03: [bug] /(?<n>a(?*n)|)/ isn't infinite recursion.
+ fix N_LIST case in subexp_recursive_check(). (thanks akr)
+2003/03/03: [bug] /(?<n>|(?*n))+/ segmentation fault.
+ should re-allocate in unset_addr_list_add(). (thanks akr)
+
+2003/03/01: Version 1.8.1
+
+2003/03/01: [bug] change STACK_GET_MEM_START() and STACK_PUSH_MEM_END().
+2003/03/01: [new] add reg_name_to_group_numbers() to POSIX API.
+2003/03/01: [impl] use OP_MEMORY_END_PUSH in callable subexp compiled code
+ only if subexp is recursive.
+2003/03/01: [spec] rename regex_name_to_backrefs() to
+ regex_name_to_group_numbers().
+2003/02/28: [impl] use function stack_double() instead of macro.
+2003/02/28: [new] subexp call. (?*name) (thanks akr)
+2003/02/28: [spec] add match stack limit check. (MATCH_STACK_LIMIT_SIZE)
+2003/02/28: [impl] check recursive subexp call.
+2003/02/28: [impl] add opcode OP_MEMORY_END_PUSH for callable subexp.
+2003/02/28: [impl] add opcode OP_CALL, OP_RETURN.
+ add stack type STK_CALL_FRAME, STK_RETURN, STK_MEM_END.
+2003/02/26: [spec] add new syntax behavior REG_SYN_STRICT_CHECK_BACKREF_NUMBER.
+ if it is setted, then error /(\1)/, /\1(..)/ etc...
+2003/02/26: [spec] if backref number is greater than max group number,
+ then return compile error. (REGERR_INVALID_BACKREF_NUMBER)
+2003/02/26: [tune] bad implemented N_ALT case in get_min_match_length().
+2003/02/26: [dist] auto update testc.c and win32/testc.c in dist target.
+2003/02/26: [impl] add -win option to testconv.rb.
+2003/02/25: [spec] allow to assign same name to different group.
+ add OP_BACKREF_MULTI.
+2003/02/24: [impl] reduce redundant repeat of empty target.
+ ex. /()*/ ==> /()?/, /()+/ ==> /()/, /(?:)+/ ==> //
+2003/02/24: [impl] change condition in regex_is_allow_reverse_match().
+2003/02/24: [impl] convert i(/../, ...) functions in testconv.rb.
+2003/02/24: [impl] change name table struct.
+
+2003/02/22: Version 1.8.0
+
+2003/02/22: [new] named subexp, named back reference. (thanks akr)
+ define: (?<name>...), back-ref: \g<name>
+2003/02/22: [impl] use str_node_can_be_split().
+2003/02/21: [dist] add sample/posix.c
+2003/02/21: [spec] rename some error code symbols.
+2003/02/21: [spec] max number of multibyte ranges(255) is small.
+ 255 --> 1000. (thanks MoonWolf)
+2003/02/20: [new] supported Basic Regular Expression(BRE) in POSIX API.
+ (REG_EXTENDED option: Extended RE)
+2003/02/20: [new] variable syntax.
+
+2003/02/12: Version 1.7.2
+
+2003/02/12: [bug] mismatch /\?a/i.match('?A').
+ check raw value in scan_make_node() and scan_backslash().
+ (thanks Nobu)
+2003/02/12: [impl] rename 'max_mem' to 'num_mem' in regex_t.
+2003/02/12: [impl] rename 'code' to 'enc' in regex_t.
+2003/02/12: [spec] remove transtable argument in regex_new and regex_recompile.
+ remove transtable member in regex_t.
+2003/02/10: [inst] change backup file suffix name from '.orig' to '.ruby_orig'.
+ (win32/Makefile)
+2003/02/10: [spec] number check in scan_char_class() ignore-case mode.
+ ex. /[\x58-\x64]/i
+2003/02/10: [impl] don't use OP_MEMORY_END_PUSH (and STK_MEM_END).
+2003/02/10: [impl] lift up head_exact value from child qualifier node to parent.
+2003/02/10: [tune] change stack type values.
+2003/02/10: [dist] add HISTORY.
+2003/02/08: [tune] change stack type values.
+2003/02/08: [tune] add STACK_BASE_CHECK().
+2003/02/08: [tune] add STACK_PUSH_ENSURED().
+2003/02/08: [dist] change contents of doc/API.
+2003/02/07: [inst] change backup file suffix name from '.orig' to '.ruby_orig'.
+2003/02/07: [spec] range in char-class should be same spec. with Ruby
+ in ignore-case mode. (ex. /[A-c]/i == /[a-c]/i)
+ (thanks MoonWolf)
+2003/02/07: [spec] [!--] should be allowed. (thanks MoonWolf)
+2003/02/07: [dist] refresh re.c.180.patch for re.c (2003-02-06).
+
+2003/02/07: Version 1.7.1
+
+2003/02/07: [impl] check first byte of string in ignore-case mode.
+ (get_head_exact_node())
+2003/02/07: [impl] remove redundant statements in setup_tree().
+2003/02/06: [new] create Win32 DLL.
+2003/02/06: [impl] use P_() macro for function prototype.
+2003/02/06: [impl] add HAVE_PROTOTYPE, HAVE_STDARG_PROTOTYPES in
+ configure.in and config.h.in.
+2003/02/06: [spec] /[0-9-a]/ is allowed as usual char '-' and 'a' in Ruby.
+ add USE_BETTER_COMPATIBILITY_FOR_ORIGINAL_REGEX in
+ regint.h. (thanks MoonWolf)
+2003/02/06: [spec] rename REG_MBCTYPE_XXXX to REG_ENCODING_XXXX in onigposix.h.
+2003/02/05: [spec] rename MBCTYPE_XXXX to REG_MBCTYPE_XXXX in onigposix.h.
+2003/02/05: [spec] add POSIX API error REG_EONIG_THREAD to onigposix.h.
+2003/02/05: [dist] add .cvsignore file.
+
+2003/02/04: Version 1.7
+
+2003/02/04: [bug] typo miss in regex_region_copy().
+2003/02/04: [impl] change THREAD_PASS macro. (regint.h)
+2003/02/04: [dist] add API document file doc/API.
+2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then
+ set REG_OPTIMIZE_EXACT_BM in set_optimize_exact_info().
+2003/02/04: [spec] reimplement regex_clone() and it is obsoleted.
+2003/02/04: [bug] add REGERR_OVER_THREAD_PASS_LIMIT_COUNT
+ to regerror.c regposix.c.
+2003/02/03: [bug] Hankaku-Kana may be second byte in Shift_JIS
+ regex_is_allow_reverse_match().
+2003/02/03: [impl] add optimization type REG_OPTIMIZE_EXACT_BM_NOT_REV.
+ remove exact_allow_reverse_match member in regex_t.
+2003/02/03: [impl] add exact_allow_reverse_match member in regex_t.
+2003/02/03: [impl] compile-search conflict in regex_search() is handled.
+2003/02/01: [tune] decrease regex_region_clear() calling from regex_search().
+2003/02/01: [tune] remove region argument from match_at().
+2003/01/31: [tune] don't use strlen() in regexec() and regcomp().
+2003/01/31: [tune] decrease regex_reduce_chain() calling in regex_search().
+2003/01/31: [bug] STRING_CMP() in regexec.c was wrong in ignore-case.
+2003/01/31: [impl] convert to lower-case char at string compile time.
+ change SBTRANSCMP() in regexec.c.
+2003/01/31: [impl] rename TTRANS() to TOLOWER().
+2003/01/30: [bug] .c.o --> .c.obj in win32\Makefile.
+2003/01/30: [impl] add -DNOT_RUBY to Makefile.in.
+ NOT_RUBY is refered in regint.h for escape double
+ including config.h.
+2003/01/30: [impl] when string hasn't case ambiguity, don't compile
+ to ignore case opcode.
+2003/01/29: [impl] add SJIS, UTF-8 test_sb() test.
+2003/01/29: [dist] add INSTALL-RUBY file.
+2003/01/28: [test] success in Cygwin, Ruby 1.8.0 (2003-01-27).
+2003/01/24: [inst] add rback target to Makefile.in.
+2003/01/24: [impl] change SBCMP() -> IS_NEWLINE() in match_at().
+2003/01/23: [impl] add encoding arg to scan_xxxx_number().
+2003/01/23: [impl] rename WCInt to WCINT.
+2003/01/22: [bug] POSIX API regexec() was not thread safe.
+ remove region member from POSIX regex_t.
+ [new] add search time option REG_OPTION_POSIX_REGION.
+ (region argument is treated as regmatch_t[] type)
+ speed up regexec().
+2003/01/22: [memo] start CVS entry in my box.
+
+2003/01/21: Version 1.6
+
+2003/01/21: [test] Mac OS X 10.1, Ruby 1.8.0 (2003-01-20)
+2003/01/20: [impl] add UTF-8 check to test.rb. (thanks UENO Katsuhiro)
+2003/01/18: [impl] change REGION_NOTPOS to REG_REGION_NOTPOS in regex.h.
+2003/01/17: [dist] add sample/simple.c.
+2003/01/17: [inst] add configure option --with-rubydir.
+2003/01/17: [bug] bad implemeted POSIX API options.
+ default: /./ not match "\n", anchor not match "\n"
+ REG_NEWLINE: /./ not match "\n", anchor match "\n"
+2003/01/16: [impl] rewrite POSIX API regexec() for speed up.
+2003/01/16: [impl] add region member to POSIX regex_t struct.
+2003/01/16: [inst] rename library file from 'libregex.a' to 'libonig.a'.
+2003/01/15: [dist] add testc.c to distribution file.
+2003/01/15: [test] success in 'make rtest/ctest/ptest' on Windows 2000.
+2003/01/15: [bug] change '/' to \' in win32/Makefile.
+2003/01/14: [test] success in Ruby make test on Windows 2000.
+ VC++6.0, Ruby 1.6.8 (2003-01-12)
+2003/01/14: [inst] change Makefile.in and win32/Makefile.
+2003/01/11: [inst] changes for Win32 platform. (regint.h, reggnu.c, regcomp.c)
+2003/01/11: [dist] add win32 directory. (config.h, Makefile, testc.c)
+2003/01/10: [inst] add onigposix.h to install target. (Makefile.in)
+2003/01/10: [bug] lacked a comma in ESTRING[]. (regposerr.c)
+2003/01/10: [bug] local variable name was wrong. buf -> tbuf (regerror())
+2003/01/10: [spec] remove REG_RUBY_M17N case from onigposix.h and regposix.c.
+
+2003/01/09: Version 1.5
+
+2003/01/09: [inst] replace Ruby re.c.XXX.patch files. (166 -> 168, 172 -> 180)
+2003/01/09: [new] implement POSIX API. (thanks knu)
+ (onigposix.h, regposix.c, regposerr.c)
+2003/01/08: [spec] remove REGERR_END_PATTERN_AFTER_BACKSLASH in regex.h.
+2003/01/08: [spec] region arg can be NULL in regex_search() and regex_match().
+
+2003/01/08: Version 1.4
+
+2003/01/08: [inst] add test program converter (test.rb -> testc.c).
+2003/01/08: [bug] move GET_WCINT() from regcomp.c to regint.h.
+2003/01/07: [inst] add new test script (test.rb).
+2002/12/30: [bug] wrong merge in multibyte mode (alt_merge_opt_exact_info()).
+2002/12/28: [inst] add rtest target to Makefile.in.
+2002/12/28: [bug] /\xfe/.match("\xfe") mismatch in multibyte mode.
+ add "raw" flag arg to concat_opt_exact_info_str().
+2002/12/25: [bug] check condition was wrong in alt_merge_opt_map_info().
+2002/12/25: [impl] add threshold_len check in regex_search().
+2002/12/23: [bug] prec-read in alternative (/a|(?=z).f/.match("zf") => nil)
+2002/12/23: [bug] \G in alternative (/a|\Gz/.match("bza") => "z").
+ add start member in MatchArg. (regexec.c)
+2002/12/21: [impl] **** rewrite all optimization process. ****
+2002/12/16: [impl] remove node subtype EFFECT_EMPTY.
+2002/12/12: [impl] reconstruct node types. (regcomp.c)
+2002/12/11: [impl] add regerror.c
+2002/12/10: [bug] [ruby-dev:19042] (thanks Nobu)
+ anchor(\G etc...) influenced outside of "|". (/a|\Gb/)
+2002/11/30: [bug] [ruby-dev:18966] (thanks Nobu)
+ char-class(\S, [^\s] etc...) optimize map-info was wrong.
+2002/11/29: [bug] infinite loop on NULL-pointer str search (regex_search()).
+ (thanks matz)
+2002/11/29: [bug] change static -> extern (regex_chain_reduce()).
+2002/11/29: [bug] change encoding to RegDefaultCharEncoding
+ in re_recompile_pattern(). (adapt to re.c)
+2002/04/24: [spec] USE_ONIGURUMA_EXTENSION is disabled in default.
+2002/04/24: [new] add searching time option: REG_OPTION_NOTBOL/NOTEOL.
+ add searching time option argument to regex_search() and
+ regex_match(). (prepare for POSIX API)
+2002/04/20: [impl] divide regex.c file into regcomp.c, regexec.c, reggnu.c
+ and regint.h.
+2002/04/09: [impl] move IS_MULTILINE() to outside of loop in OP_ANYCHAR_STAR.
+2002/04/08: [impl] don't use OP_REPEAT operator for '??'.
+2002/04/06: [impl] reduce redundant nested repeat operators(?,*,+,??,*?,+?).
+ ex. (?:a*)?, (?:a??)* etc..
+2002/04/06: [spec] should not warn for /(?:a?)+?/.
+2002/04/04: [spec] should allow fixed length alternative and repeat pattern
+ in look-behind. ex. /(?<=(a|b){3})/ (thanks Guy Decoux)
+2002/04/02: [spec] should warn for /(?:a+)?/ and /(?:a*)??/. (thanks akr)
+
+2002/04/01: Version 1.3
+
+2002/04/01: [dist] add COPYING.
+2002/03/30: [spec] warn redundant nested repeat operator
+ in Ruby verbose mode. ex. (?:a*)?
+2002/03/30: [spec] nested repeat operator error check should be
+ same with GNU regex. (thanks Guy Decoux)
+2002/03/30: [new] add \x{hexadecimal-wide-char}. (thanks matz)
+2002/03/27: [bug] MBCTYPE_XXX symbol values should be same with GNU regex.
+2002/03/27: [impl] add THREAD_ATOMIC to regex_clone(), regex_init(), regex_end().
+2002/03/25: [spec] if encoding is utf-8, allow combination of singlebyte and
+ multibyte code range in char class.
+ (cancelled 2002/04/01: for M17N compatibility)
+2002/03/25: [dist] description of the license condition is added to README.
+2002/03/23: [bug] should set all bits of reg->mem_stats,
+ if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY.
+2002/03/23: [new] add a new option REG_OPTION_NOT_EMPTY.
+2002/03/20: [spec] allow incompleted left brace as an usual char.
+ ex. /{/, /({)/, /a{2,3/ etc...
+2002/03/20: [impl] serialize integer in bytecode.
+ (switch by UNALIGNED_WORD_ACCESS in regex.c)
+2002/03/20: [impl] change re_mbcinit() for REG_RUBY_M17N.
+2002/03/19: [impl] word alignment of char class multi-byte code ranges.
+2002/03/19: [impl] replace OP_EXACTMB4N with OP_EXACTMB3N.
+2002/03/19: [bug] OP_CCLASS_MB_NOT process in matchAt() is wrong.
+2002/03/19: [new] add re_mbctab[] for Ruby extension library compatibility.
+2002/03/19: [spec] allow nested repeat operator, if operator is {n,m} type.
+2002/03/19: [new] add REG_IS_PATTERN_ERROR(ecode) in regex.h
+2002/03/18: [spec] /[a-b-c]/ should be error.
+2002/03/18: [bug] /[\w-a]/ should be error. (thanks Guy Decoux)
+2002/03/18: [bug] /[\]/ should be error. (thanks Guy Decoux)
+2002/03/18: [bug] /()*/ etc.. should not be error. (thanks Guy Decoux)
+2002/03/18: [spec] /a{1}*/ should not be error. (thanks Guy Decoux)
+2002/03/18: [bug] ab{2}{3} was interpreded to (?:a(?:b{2})){3}
+ (thanks Guy Decoux)
+2002/03/18: [bug] abort /(?i)*a/ etc... (thanks Guy Decoux)
+2002/03/18: [bug] abort /a|*/,/a|{1}/ etc... (thanks Guy Decoux)
+
+2002/03/13: Version 1.2
+
+2002/03/13: [test] success in rubicon/builtin/AllBuiltinTests.rb.
+ (thanks rubicon)
+2002/03/13: [bug] OP_EXACTMBN process in matchAt() is wrong.
+2002/03/13: [bug] start argument of BackwardSearchRange() is wrong.
+2002/03/12: [spec] change function name style from CamelCase
+ to underline_separation. (includes API)
+2002/03/12: [bug] if pattern has nested null-check, cause infinite loop.
+ correct STACK_NULL_CHECK() macro. (thanks Guy Decoux)
+2002/03/11: [bug] it is wrong that four numbers to continue as
+ an octal value in scanBackSlash(). ex. /\0111/
+ (thanks matz)
+2002/03/11: [new] \k (single-byte word char), \K (multi-byte char).
+2002/03/09: [inst] add two targets to Makefile.in (166 and 172).
+2002/03/09: [spec] decrease REG_MAX_BACKREF_NUM, REG_MAX_REPEAT_NUM
+ values.
+2002/03/08: [spec] allow use of "\A"(begin-buf) in look-behind.
+2002/03/08: [impl] add a new opcode OP_PUSH_IF_PEEK_NEXT.
+2002/03/08: [impl] add a new opcode OP_ANYCHAR_STAR_PEEK_NEXT.
+2002/03/07: [spec] prohibit use of capture group "(...)"
+ in negative look-behind.
+2002/03/07: [inst] add configure.in, config.h.in, Makefile.in.
+2002/03/07: [impl] call Init_REGEX_STAT() in RegexInit().
+2002/03/07: [spec] less length string match with negative look-behind.
+ ex. /(?<!XXX)a/.match("Xa"). (thanks Nobu)
+2002/03/06: [impl] expand repeated string, if expanded length <= 100.
+ ex. /(?:abc){10}/
+2002/03/06: [new] add a symbol REG_TRANSTABLE_USE_DEFAULT in regex.h.
+2002/03/06: [impl] rename RegDefaultCharCode to RegDefaultCharEncoding.
+2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens
+ in ScanMakeNode(). (beware of strchr(). thanks Nobu)
+2002/03/06: [bug] range argument of ForwardSearchRange() is wrong.
+ ex. /\A.a/, /\G.a/ mismatched with "aa". (thanks Nobu)
+2002/03/05: [new] add RegexMatch() API. rename regexMatch() to matchAt().
+2002/03/05: [impl] change function definition style.
+2002/03/05: [impl] abolish use of macro symbol which name begin with underline.
+2002/03/04: [bug] make up a break-statement in compileTree().
+ (compile error on Mac OS X 10.1.3)
+
+2002/03/04: Version 1.1
+
+2002/03/04: [impl] replace STK_BOTTOM with STK_ALT.
+2002/03/02: [impl] add new opcode OP_FINISH and new stack type
+ STK_BOTTOM for (little bit) speed up STACK_POP.
+2002/03/02: [impl] add new opcode OP_EXACT1_IC, OP_EXACTN_IC
+ for compile time ignore case check.
+ remove opcode OP_EXACT1_RAW, OP_EXACTN_RAW.
+2002/03/02: [impl] add OpTime info to statistical data.
+2002/02/28: [bug] sub_anchor($) in ForwardSearch() and BackwardSearch().
+ ex. /$\x0az/.match("\nz")
+2002/02/28: [new] look-behind (?<=pattern), (?<!pattern).
+2002/02/27: [bug] use StackIndex instead of StackType* for realloc problem.
+2002/02/27: [impl] use m17n_codepoint() as mb2wc() in REG_RUBY_M17N.
+2002/02/27: [spec] undefined POSIX bracket /[[:xyz:]]/ should be syntax error.
+2002/02/26: [bug] ex. /$*/, /[a-]/, /((?i)a)b/ (thanks matz)
+
+2002/02/25: Version 1.0 (first release)
+
+--
+[bug: bug fix]
+[API: API change/new/delete]
+[new: new feature]
+[spec: specification change]
+[impl: implementation change]
+[tune: tune for speed up]
+[inst: changes for installation]
+[dist: distribution change]
+[test: test]
+[memo: memo]
+--
+<CVS: show all tags>
+cvs history -T
+
+<CVS: add tag>
+cvs rtag "VERSION_X_X_X" oniguruma
+
+
+<GNU Autotools: bootstrap>
+* write Makefile.am and configure.in.
+> aclocal
+> libtoolize
+> automake --foreign --add-missing
+> autoconf
+> configure --with-rubydir=... CFLAGS="-O2 -Wall"
+
+
+<GNU libtool: version management>
+
+ VERSION = current:revision:age
+
+ current: interface number (from 0)
+ revision: implementation number of same interface (from 0)
+ age: number of supported previous interfaces
+ (if current only supported then age == 0)
+
+//END
diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README
new file mode 100644
index 0000000..dff7fba
--- /dev/null
+++ b/ext/mbstring/oniguruma/README
@@ -0,0 +1,189 @@
+README 2007/06/18
+
+Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+
+http://www.geocities.jp/kosako3/oniguruma/
+http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
+
+Oniguruma is a regular expressions library.
+The characteristics of this library is that different character encoding
+for every regular expression object can be specified.
+
+Supported character encodings:
+
+ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
+ EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
+ ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+
+* GB 18030: contributed by KUBO Takehiro
+* KOI8 is not included in library archive by default setup.
+ (need to edit Makefile if you want to use it.)
+------------------------------------------------------------
+
+Install
+
+ Case 1: Unix and Cygwin platform
+
+ 1. ./configure
+ 2. make
+ 3. make install
+
+ * uninstall
+
+ make uninstall
+
+ * test (ASCII/EUC-JP)
+
+ make atest
+
+ * configuration check
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
+
+
+
+ Case 2: Win32 platform (VC++)
+
+ 1. copy win32\Makefile Makefile
+ 2. copy win32\config.h config.h
+ 3. nmake
+
+ onig_s.lib: static link library
+ onig.dll: dynamic link library
+
+ * test (ASCII/Shift_JIS)
+ 4. copy win32\testc.c testc.c
+ 5. nmake ctest
+
+
+
+License
+
+ When this software is partly used or it is distributed with Ruby,
+ this of Ruby follows the license of Ruby.
+ It follows the BSD license in the case of the one except for it.
+
+
+
+Regular Expressions
+
+ See doc/RE (or doc/RE.ja for Japanese).
+
+
+Usage
+
+ Include oniguruma.h in your program. (Oniguruma API)
+ See doc/API for Oniguruma API.
+
+ If you want to disable UChar type (== unsigned char) definition
+ in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
+ include oniguruma.h.
+
+ If you want to disable regex_t type definition in oniguruma.h,
+ define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
+
+ Example of the compiling/linking command line in Unix or Cygwin,
+ (prefix == /usr/local case)
+
+ cc sample.c -L/usr/local/lib -lonig
+
+
+ If you want to use static link library(onig_s.lib) in Win32,
+ add option -DONIG_EXTERN=extern to C compiler.
+
+
+
+Sample Programs
+
+ sample/simple.c example of the minimum (Oniguruma API)
+ sample/names.c example of the named group callback.
+ sample/encode.c example of some encodings.
+ sample/listcap.c example of the capture history.
+ sample/posix.c POSIX API sample.
+ sample/sql.c example of the variable meta characters.
+ (SQL-like pattern matching)
+ sample/syntax.c Perl, Java and ASIS syntax test.
+
+
+Source Files
+
+ oniguruma.h Oniguruma API header file. (public)
+ onig-config.in configuration check program template.
+
+ regenc.h character encodings framework header file.
+ regint.h internal definitions
+ regparse.h internal definitions for regparse.c and regcomp.c
+ regcomp.c compiling and optimization functions
+ regenc.c character encodings framework.
+ regerror.c error message function
+ regext.c extended API functions. (deluxe version API)
+ regexec.c search and match functions
+ regparse.c parsing functions.
+ regsyntax.c pattern syntax functions and built-in syntax definitions.
+ regtrav.c capture history tree data traverse functions.
+ regversion.c version info function.
+ st.h hash table functions header file
+ st.c hash table functions
+
+ oniggnu.h GNU regex API header file. (public)
+ reggnu.c GNU regex API functions
+
+ onigposix.h POSIX API header file. (public)
+ regposerr.c POSIX error message function.
+ regposix.c POSIX API functions.
+
+ enc/mktable.c character type table generator.
+ enc/ascii.c ASCII encoding.
+ enc/euc_jp.c EUC-JP encoding.
+ enc/euc_tw.c EUC-TW encoding.
+ enc/euc_kr.c EUC-KR, EUC-CN encoding.
+ enc/sjis.c Shift_JIS encoding.
+ enc/big5.c Big5 encoding.
+ enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro)
+ enc/koi8.c KOI8 encoding.
+ enc/koi8_r.c KOI8-R encoding.
+ enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
+ enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
+ enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
+ enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
+ enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
+ enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
+ enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
+ enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
+ enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
+ enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
+ enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
+ enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
+ enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
+ enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
+ enc/iso8859_16.c ISO-8859-16 encoding.
+ (Latin-10 or South-Eastern European with Euro)
+ enc/utf8.c UTF-8 encoding.
+ enc/utf16_be.c UTF-16BE encoding.
+ enc/utf16_le.c UTF-16LE encoding.
+ enc/utf32_be.c UTF-32BE encoding.
+ enc/utf32_le.c UTF-32LE encoding.
+ enc/unicode.c Unicode information data.
+
+ win32/Makefile Makefile for Win32 (VC++)
+ win32/config.h config.h for Win32
+
+
+
+API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
+
+ + re_compile_fastmap() is removed.
+ + re_alloc_pattern() is added.
+
+
+
+I'm thankful to Akinori MUSHA.
+
+
+Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja
new file mode 100644
index 0000000..2dee793
--- /dev/null
+++ b/ext/mbstring/oniguruma/README.ja
@@ -0,0 +1,192 @@
+README.ja 2007/06/18
+
+µ´¼Ö ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+
+http://www.geocities.jp/kosako3/oniguruma/
+http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
+
+µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£
+¤³¤Î¥é¥¤¥Ö¥é¥ê¤ÎÆÃĹ¤Ï¡¢¤½¤ì¤¾¤ì¤ÎÀµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤´¤È¤Ë
+ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò»ØÄê¤Ç¤­¤ë¤³¤È¤Ç¤¢¤ë¡£
+
+¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°:
+
+ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
+ EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
+ ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+
+* GB 18030: µ×ÊÝ·òÍλáÄó¶¡
+* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
+ (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
+------------------------------------------------------------
+
+¥¤¥ó¥¹¥È¡¼¥ë
+
+ ¥±¡¼¥¹£±: Unix¤ÈCygwin´Ä¶­
+
+ 1. ./configure
+ 2. make
+ 3. make install
+
+ ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
+
+ make uninstall
+
+ ưºî¥Æ¥¹¥È (ASCII/EUC-JP)
+
+ make atest
+
+
+ ¹½À®³Îǧ
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
+
+
+
+ ¥±¡¼¥¹£²: Win32(VC++)´Ä¶­
+
+ 1. copy win32\Makefile Makefile
+ 2. copy win32\config.h config.h
+ 3. nmake
+
+ onig_s.lib: static link library
+ onig.dll: dynamic link library
+
+ * ưºî¥Æ¥¹¥È (ASCII/Shift_JIS)
+ 4. copy win32\testc.c testc.c
+ 5. nmake ctest
+
+
+¥é¥¤¥»¥ó¥¹
+
+ ¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤¬Ruby¤È°ì½ï¤Ë»ÈÍѤޤ¿¤ÏÇÛÉÛ¤µ¤ì¤ë¾ì¹ç¤Ë¤Ï¡¢
+ Ruby¤Î¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
+ ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¡¢BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
+
+
+Àµµ¬É½¸½
+
+ doc/RE.ja¤ò»²¾È
+
+
+»ÈÍÑÊýË¡
+
+ »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£
+ Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç
+ ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬
+ Í­¸ú¤Ë¤Ê¤ë¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦
+ ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í­¸ú¤Ë¤Ê¤ë¡£
+
+ Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§
+ (prefix¤¬/usr/local¤Î¤È¤­)
+ cc sample.c -L/usr/local/lib -lonig
+
+ GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ
+ ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
+ ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î
+ ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
+
+
+ Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
+ ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤­¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£
+
+
+»ÈÍÑÎã¥×¥í¥°¥é¥à
+
+ sample/simple.c ºÇ¾®Îã (Oniguruma API)
+ sample/names.c ̾Á°ÉÕ¤­¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã
+ sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã
+ sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
+ sample/posix.c POSIX API»ÈÍÑÎã
+ sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
+ sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È
+
+
+¥½¡¼¥¹¥Õ¥¡¥¤¥ë
+
+ oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«)
+ onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È
+
+ regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ
+ regint.h ÆâÉôÀë¸À
+ regparse.h regparse.c¤Èregcomp.c¤Î¤¿¤á¤ÎÆâÉôÀë¸À
+ regcomp.c ¥³¥ó¥Ñ¥¤¥ë¡¢ºÇŬ²½´Ø¿ô
+ regenc.c ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤß
+ regerror.c ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô
+ regext.c ³ÈÄ¥API´Ø¿ô
+ regexec.c ¸¡º÷¡¢¾È¹ç´Ø¿ô
+ regparse.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥ó²òÀÏ´Ø¿ô
+ regsyntax.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ´Ø¿ô¡¢Áȹþ¤ßʸˡÄêµÁ
+ regtrav.c Êá³ÍÍúÎòÌÚ½ä²ó´Ø¿ô
+ regversion.c ÈǾðÊó´Ø¿ô
+ st.h ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ôÀë¸À
+ st.c ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ô
+
+ oniggnu.h GNU regex API¥Ø¥Ã¥À (¸ø³«)
+ reggnu.c GNU regex API´Ø¿ô
+
+ onigposix.h POSIX API¥Ø¥Ã¥À (¸ø³«)
+ regposerr.c POSIX API¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô
+ regposix.c POSIX API´Ø¿ô
+
+ enc/mktable.c ʸ»ú¥¿¥¤¥×¥Æ¡¼¥Ö¥ëÀ¸À®¥×¥í¥°¥é¥à
+ enc/ascii.c ASCII ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_jp.c EUC-JP ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_tw.c EUC-TW ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡)
+ enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/iso8859_1.c ISO-8859-1 (Latin-1)
+ enc/iso8859_2.c ISO-8859-2 (Latin-2)
+ enc/iso8859_3.c ISO-8859-3 (Latin-3)
+ enc/iso8859_4.c ISO-8859-4 (Latin-4)
+ enc/iso8859_5.c ISO-8859-5 (Cyrillic)
+ enc/iso8859_6.c ISO-8859-6 (Arabic)
+ enc/iso8859_7.c ISO-8859-7 (Greek)
+ enc/iso8859_8.c ISO-8859-8 (Hebrew)
+ enc/iso8859_9.c ISO-8859-9 (Latin-5 ¤Þ¤¿¤Ï Turkish)
+ enc/iso8859_10.c ISO-8859-10 (Latin-6 ¤Þ¤¿¤Ï Nordic)
+ enc/iso8859_11.c ISO-8859-11 (Thai)
+ enc/iso8859_13.c ISO-8859-13 (Latin-7 ¤Þ¤¿¤Ï Baltic Rim)
+ enc/iso8859_14.c ISO-8859-14 (Latin-8 ¤Þ¤¿¤Ï Celtic)
+ enc/iso8859_15.c ISO-8859-15 (Latin-9 ¤Þ¤¿¤Ï West European with Euro)
+ enc/iso8859_16.c ISO-8859-16
+ (Latin-10 ¤Þ¤¿¤Ï South-Eastern European with Euro)
+ enc/utf8.c UTF-8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf16_be.c UTF-16BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf16_le.c UTF-16LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf32_be.c UTF-32BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf32_le.c UTF-32LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/unicode.c Unicode¾ðÊó
+
+ win32/Makefile Win32ÍÑ Makefile (for VC++)
+ win32/config.h Win32ÍÑ config.h
+
+
+
+Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
+
+ + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£
+ + re_alloc_pattern() ¤¬Äɲ䵤줿¡£
+
+
+I'm thankful to Akinori MUSHA.
+
+
+¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in
new file mode 100644
index 0000000..4a2fc28
--- /dev/null
+++ b/ext/mbstring/oniguruma/config.h.in
@@ -0,0 +1,108 @@
+/* config.h.in. Generated from configure.in by autoheader. */
+
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+ systems. This function is required for `alloca.c' support on those systems.
+ */
+#undef CRAY_STACKSEG_END
+
+/* Define to 1 if using `alloca.c'. */
+#undef C_ALLOCA
+
+/* Define to 1 if you have `alloca', as a function or macro. */
+#undef HAVE_ALLOCA
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+ */
+#undef HAVE_ALLOCA_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if compilerr supports prototypes */
+#undef HAVE_PROTOTYPES
+
+/* Define if compiler supports stdarg prototypes */
+#undef HAVE_STDARG_PROTOTYPES
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `int', as computed by sizeof. */
+#undef SIZEOF_INT
+
+/* The size of a `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+/* The size of a `short', as computed by sizeof. */
+#undef SIZEOF_SHORT
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if combination explosion check */
+#undef USE_COMBINATION_EXPLOSION_CHECK
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API
new file mode 100644
index 0000000..2f66287
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/API
@@ -0,0 +1,585 @@
+Oniguruma API Version 4.7.1 2007/07/04
+
+#include <oniguruma.h>
+
+
+# int onig_init(void)
+
+ Initialize library.
+
+ You don't have to call it explicitly, because it is called in onig_new().
+
+
+# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
+
+ Get error message string.
+ If this function is used for onig_new(),
+ don't call this after the pattern argument of onig_new() is freed.
+
+ normal return: error message string length
+
+ arguments
+ 1 err_buf: error message string buffer.
+ (required size: ONIG_MAX_ERROR_MESSAGE_LEN)
+ 2 err_code: error code returned by other API functions.
+ 3 err_info (optional): error info returned by onig_new().
+
+
+# void onig_set_warn_func(OnigWarnFunc func)
+
+ Set warning function.
+
+ WARNING:
+ '[', '-', ']' in character class without escape.
+ ']' in pattern without escape.
+
+ arguments
+ 1 func: function pointer. void (*func)(char* warning_message)
+
+
+# void onig_set_verb_warn_func(OnigWarnFunc func)
+
+ Set verbose warning function.
+
+ WARNING:
+ redundant nested repeat operator.
+
+ arguments
+ 1 func: function pointer. void (*func)(char* warning_message)
+
+
+# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* err_info)
+
+ Create a regex object.
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 reg: return regex object's address.
+ 2 pattern: regex pattern string.
+ 3 pattern_end: terminate address of pattern. (pattern + pattern length)
+ 4 option: compile time options.
+
+ ONIG_OPTION_NONE no option
+ ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
+ ONIG_OPTION_MULTILINE '.' match with newline
+ ONIG_OPTION_IGNORECASE ambiguity match on
+ ONIG_OPTION_EXTEND extended pattern form
+ ONIG_OPTION_FIND_LONGEST find longest match
+ ONIG_OPTION_FIND_NOT_EMPTY ignore empty match
+ ONIG_OPTION_NEGATE_SINGLELINE
+ clear ONIG_OPTION_SINGLELINE which is enabled on
+ ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA
+
+ ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
+ ONIG_OPTION_CAPTURE_GROUP named and no-named group captured.
+
+ 5 enc: character encoding.
+
+ ONIG_ENCODING_ASCII ASCII
+ ONIG_ENCODING_ISO_8859_1 ISO 8859-1
+ ONIG_ENCODING_ISO_8859_2 ISO 8859-2
+ ONIG_ENCODING_ISO_8859_3 ISO 8859-3
+ ONIG_ENCODING_ISO_8859_4 ISO 8859-4
+ ONIG_ENCODING_ISO_8859_5 ISO 8859-5
+ ONIG_ENCODING_ISO_8859_6 ISO 8859-6
+ ONIG_ENCODING_ISO_8859_7 ISO 8859-7
+ ONIG_ENCODING_ISO_8859_8 ISO 8859-8
+ ONIG_ENCODING_ISO_8859_9 ISO 8859-9
+ ONIG_ENCODING_ISO_8859_10 ISO 8859-10
+ ONIG_ENCODING_ISO_8859_11 ISO 8859-11
+ ONIG_ENCODING_ISO_8859_13 ISO 8859-13
+ ONIG_ENCODING_ISO_8859_14 ISO 8859-14
+ ONIG_ENCODING_ISO_8859_15 ISO 8859-15
+ ONIG_ENCODING_ISO_8859_16 ISO 8859-16
+ ONIG_ENCODING_UTF8 UTF-8
+ ONIG_ENCODING_UTF16_BE UTF-16BE
+ ONIG_ENCODING_UTF16_LE UTF-16LE
+ ONIG_ENCODING_UTF32_BE UTF-32BE
+ ONIG_ENCODING_UTF32_LE UTF-32LE
+ ONIG_ENCODING_EUC_JP EUC-JP
+ ONIG_ENCODING_EUC_TW EUC-TW
+ ONIG_ENCODING_EUC_KR EUC-KR
+ ONIG_ENCODING_EUC_CN EUC-CN
+ ONIG_ENCODING_SJIS Shift_JIS
+ ONIG_ENCODING_KOI8 KOI8
+ ONIG_ENCODING_KOI8_R KOI8-R
+ ONIG_ENCODING_BIG5 Big5
+ ONIG_ENCODING_GB18030 GB 18030
+
+ or any OnigEncodingType data address defined by user.
+
+ 6 syntax: address of pattern syntax definition.
+
+ ONIG_SYNTAX_ASIS plain text
+ ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
+ ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
+ ONIG_SYNTAX_EMACS Emacs
+ ONIG_SYNTAX_GREP grep
+ ONIG_SYNTAX_GNU_REGEX GNU regex
+ ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
+ ONIG_SYNTAX_PERL Perl
+ ONIG_SYNTAX_PERL_NG Perl + named group
+ ONIG_SYNTAX_RUBY Ruby
+ ONIG_SYNTAX_DEFAULT default (== Ruby)
+ onig_set_default_syntax()
+
+ or any OnigSyntaxType data address defined by user.
+
+ 7 err_info: address for return optional error info.
+ Use this value as 3rd argument of onig_error_code_to_str().
+
+
+
+# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+
+ Create a regex object.
+ This function is deluxe version of onig_new().
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 reg: return address of regex object.
+ 2 pattern: regex pattern string.
+ 3 pattern_end: terminate address of pattern. (pattern + pattern length)
+ 4 ci: compile time info.
+
+ ci->num_of_elements: number of elements in ci. (current version: 5)
+ ci->pattern_enc: pattern string character encoding.
+ ci->target_enc: target string character encoding.
+ ci->syntax: address of pattern syntax definition.
+ ci->option: compile time option.
+ ci->ambig_flag: character matching ambiguity bit flag for
+ ONIG_OPTION_IGNORECASE mode.
+
+ ONIGENC_AMBIGUOUS_MATCH_NONE: exact
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII
+ ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
+ onig_set_default_ambig_flag()
+
+ 5 err_info: address for return optional error info.
+ Use this value as 3rd argument of onig_error_code_to_str().
+
+
+ Different character encoding combination is allowed for
+ the following cases only.
+
+ pattern_enc: ASCII, ISO_8859_1
+ target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
+
+ pattern_enc: UTF16_BE/LE
+ target_enc: UTF16_LE/BE
+
+ pattern_enc: UTF32_BE/LE
+ target_enc: UTF32_LE/BE
+
+
+# void onig_free(regex_t* reg)
+
+ Free memory used by regex object.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
+ const UChar* range, OnigRegion* region, OnigOptionType option)
+
+ Search string and return search result and matching region.
+
+ normal return: match position offset (i.e. p - str >= 0)
+ not found: ONIG_MISMATCH (< 0)
+
+ arguments
+ 1 reg: regex object
+ 2 str: target string
+ 3 end: terminate address of target string
+ 4 start: search start address of target string
+ 5 range: search terminate address of target string
+ in forward search (start <= searched string head < range)
+ in backward search (range <= searched string head <= start)
+ 6 region: address for return group match range info (NULL is allowed)
+ 7 option: search time option
+
+ ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
+ ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
+ ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
+
+
+# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
+ OnigRegion* region, OnigOptionType option)
+
+ Match string and return result and matching region.
+
+ normal return: match length (>= 0)
+ not match: ONIG_MISMATCH ( < 0)
+
+ arguments
+ 1 reg: regex object
+ 2 str: target string
+ 3 end: terminate address of target string
+ 4 at: match address of target string
+ 5 region: address for return group match range info (NULL is allowed)
+ 6 option: search time option
+
+ ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
+ ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
+ ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
+
+
+# OnigRegion* onig_region_new(void)
+
+ Create a region.
+
+
+# void onig_region_free(OnigRegion* region, int free_self)
+
+ Free memory used by region.
+
+ arguments
+ 1 region: target region
+ 2 free_self: [1: free all, 0: free memory used in region but not self]
+
+
+# void onig_region_copy(OnigRegion* to, OnigRegion* from)
+
+ Copy contents of region.
+
+ arguments
+ 1 to: target region
+ 2 from: source region
+
+
+# void onig_region_clear(OnigRegion* region)
+
+ Clear contents of region.
+
+ arguments
+ 1 region: target region
+
+
+# int onig_region_resize(OnigRegion* region, int n)
+
+ Resize group range area of region.
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 region: target region
+ 2 n: new size
+
+
+# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
+ int** num_list)
+
+ Return the group number list of the name.
+ Named subexp is defined by (?<name>....).
+
+ normal return: number of groups for the name.
+ (ex. /(?<x>..)(?<x>..)/ ==> 2)
+ name not found: -1
+
+ arguments
+ 1 reg: regex object.
+ 2 name: group name.
+ 3 name_end: terminate address of group name.
+ 4 num_list: return list of group number.
+
+
+# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
+ OnigRegion *region)
+
+ Return the group number corresponding to the named backref (\k<name>).
+ If two or more regions for the groups of the name are effective,
+ the greatest number in it is obtained.
+
+ normal return: group number.
+
+ arguments
+ 1 reg: regex object.
+ 2 name: group name.
+ 3 name_end: terminate address of group name.
+ 4 region: search/match result region.
+
+
+# int onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
+ void* arg)
+
+ Iterate function call for all names.
+
+ normal return: 0
+ error: func's return value.
+
+ arguments
+ 1 reg: regex object.
+ 2 func: callback function.
+ func(name, name_end, <number of groups>, <group number's list>,
+ reg, arg);
+ if func does not return 0, then iteration is stopped.
+ 3 arg: argument for func.
+
+
+# int onig_number_of_names(regex_t* reg)
+
+ Return the number of names defined in the pattern.
+ Multiple definitions of one name is counted as one.
+
+ arguments
+ 1 reg: regex object.
+
+
+# OnigEncoding onig_get_encoding(regex_t* reg)
+# OnigOptionType onig_get_options(regex_t* reg)
+# OnigAmbigType onig_get_ambig_flag(regex_t* reg)
+# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+
+ Return a value of the regex object.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_number_of_captures(regex_t* reg)
+
+ Return the number of capture group in the pattern.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_number_of_capture_histories(regex_t* reg)
+
+ Return the number of capture history defined in the pattern.
+
+ You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
+ is disabled in the pattern syntax.(disabled in the default syntax)
+
+ arguments
+ 1 reg: regex object.
+
+
+
+# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
+
+ Return the root node of capture history data tree.
+
+ This value is undefined if matching has faild.
+
+ arguments
+ 1 region: matching result.
+
+
+# int onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*func)(int,int,int,int,int,void*), void* arg)
+
+ Traverse and callback in capture history data tree.
+
+ normal return: 0
+ error: callback func's return value.
+
+ arguments
+ 1 region: match region data.
+ 2 at: callback position.
+
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs.
+ ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse childs first, then callback.
+ ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse childs,
+ and at last callback again.
+
+ 3 func: callback function.
+ if func does not return 0, then traverse is stopped.
+
+ int func(int group, int beg, int end, int level, int at,
+ void* arg)
+
+ group: group number
+ beg: capture start position
+ end: capture end position
+ level: nest level (from 0)
+ at: callback position
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST
+ ONIG_TRAVERSE_CALLBACK_AT_LAST
+ arg: optional callback argument
+
+ 4 arg; optional callback argument.
+
+
+# int onig_noname_group_capture_is_active(regex_t* reg)
+
+ Return noname group capture activity.
+
+ active: 1
+ inactive: 0
+
+ arguments
+ 1 reg: regex object.
+
+ if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
+ --> inactive
+
+ if the regex pattern have named group
+ and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
+ and option ONIG_OPTION_CAPTURE_GROUP == OFF
+ --> inactive
+
+ else --> active
+
+
+# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+
+ Return previous character head address.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ Return left-adjusted head address of a character.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ Return right-adjusted head address of a character.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+
+ Return number of characters in the string.
+
+
+# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+
+ Return number of bytes in the string.
+
+
+# int onig_set_default_syntax(OnigSyntaxType* syntax)
+
+ Set default syntax.
+
+ arguments
+ 1 syntax: address of pattern syntax definition.
+
+
+# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+
+ Copy syntax.
+
+ arguments
+ 1 to: destination address.
+ 2 from: source address.
+
+
+# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
+# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
+
+# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+
+ Get/Set elements of the syntax.
+
+ arguments
+ 1 syntax: syntax
+ 2 op, op2, behavior, options: value of element.
+
+
+# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
+
+ Copy encoding.
+
+ arguments
+ 1 to: destination address.
+ 2 from: source address.
+
+
+# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+ OnigCodePoint code)
+
+ Set a variable meta character to the code point value.
+ Except for an escape character, this meta characters specification
+ is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
+ by the syntax. (Build-in syntaxes are not effective.)
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 enc: target encoding
+ 2 what: specifies which meta character it is.
+
+ ONIG_META_CHAR_ESCAPE
+ ONIG_META_CHAR_ANYCHAR
+ ONIG_META_CHAR_ANYTIME
+ ONIG_META_CHAR_ZERO_OR_ONE_TIME
+ ONIG_META_CHAR_ONE_OR_MORE_TIME
+ ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+ 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
+
+
+# OnigAmbigType onig_get_default_ambig_flag()
+
+ Get default ambig flag.
+
+
+# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+
+ Set default ambig flag.
+
+ 1 ambig_flag: ambiguity flag
+
+
+# unsigned int onig_get_match_stack_limit_size(void)
+
+ Return the maximum number of stack size.
+ (default: 0 == unlimited)
+
+
+# int onig_set_match_stack_limit_size(unsigned int size)
+
+ Set the maximum number of stack size.
+ (size = 0: unlimited)
+
+ normal return: ONIG_NORMAL
+
+
+# int onig_end(void)
+
+ The use of this library is finished.
+
+ normal return: ONIG_NORMAL
+
+ It is not allowed to use regex objects which created
+ before onig_end() call.
+
+
+# const char* onig_version(void)
+
+ Return version string. (ex. "2.2.8")
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja
new file mode 100644
index 0000000..f2a8bd6
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/API.ja
@@ -0,0 +1,592 @@
+µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.7.1 2007/07/04
+
+#include <oniguruma.h>
+
+
+# int onig_init(void)
+
+ ¥é¥¤¥Ö¥é¥ê¤Î½é´ü²½
+
+ onig_new()¤ÎÃæ¤Ç¸Æ¤Ó½Ð¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤Î´Ø¿ô¤òÌÀ¼¨Åª¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤¯¤Æ¤â¤è¤¤¡£
+
+
+# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
+
+ ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò¼èÆÀ¤¹¤ë¡£
+
+ ¤³¤Î´Ø¿ô¤ò¡¢onig_new()¤Î·ë²Ì¤ËÂФ·¤Æ¸Æ¤Ó½Ð¤¹¾ì¹ç¤Ë¤Ï¡¢onig_new()¤Îpattern°ú¿ô¤ò
+ ¥á¥â¥ê²òÊü¤¹¤ë¤è¤ê¤âÁ°¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸Ê¸»úÎó¤Î¥Ð¥¤¥ÈĹ
+
+ °ú¿ô
+ 1 err_buf: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò³ÊǼ¤¹¤ëÎΰè
+ (ɬÍפʥµ¥¤¥º: ONIG_MAX_ERROR_MESSAGE_LEN)
+ 2 err_code: ¥¨¥é¡¼¥³¡¼¥É
+ 3 err_info (optional): onig_new()¤Îerr_info
+
+
+# void onig_set_warn_func(OnigWarnFunc func)
+
+ ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ ·Ù¹ð:
+ '[', '-', ']' in character class without escape.
+ ']' in pattern without escape.
+
+ °ú¿ô
+ 1 func: ·Ù¹ð´Ø¿ô void (*func)(char* warning_message)
+
+
+# void onig_set_verb_warn_func(OnigWarnFunc func)
+
+ ¾ÜºÙ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ ¾ÜºÙ·Ù¹ð:
+ redundant nested repeat operator.
+
+ °ú¿ô
+ 1 func: ¾ÜºÙ·Ù¹ð´Ø¿ô void (*func)(char* warning_message)
+
+
+# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* err_info)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+ 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó
+ 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length)
+ 4 option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NONE ¥ª¥×¥·¥ç¥ó¤Ê¤·
+ ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
+ ONIG_OPTION_MULTILINE '.'¤¬²þ¹Ô¤Ë¥Þ¥Ã¥Á¤¹¤ë
+ ONIG_OPTION_IGNORECASE Û£Ëæ¥Þ¥Ã¥Á ¥ª¥ó
+ ONIG_OPTION_EXTEND ¥Ñ¥¿¡¼¥ó³ÈÄ¥·Á¼°
+ ONIG_OPTION_FIND_LONGEST ºÇĹ¥Þ¥Ã¥Á
+ ONIG_OPTION_FIND_NOT_EMPTY ¶õ¥Þ¥Ã¥Á¤ò̵»ë
+ ONIG_OPTION_NEGATE_SINGLELINE
+ ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA¤Ç
+ ¥Ç¥Õ¥©¥ë¥È¤ÇÍ­¸ú¤ÊONIG_OPTION_SINGLELINE¤ò¥¯¥ê¥¢¤¹¤ë¡£
+
+ ONIG_OPTION_DONT_CAPTURE_GROUP ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Î¤ßÊá³Í
+ ONIG_OPTION_CAPTURE_GROUP ̾Á°Ìµ¤·Êá³Í¼°½¸¹ç¤âÊá³Í
+
+ 5 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+
+ ONIG_ENCODING_ASCII ASCII
+ ONIG_ENCODING_ISO_8859_1 ISO 8859-1
+ ONIG_ENCODING_ISO_8859_2 ISO 8859-2
+ ONIG_ENCODING_ISO_8859_3 ISO 8859-3
+ ONIG_ENCODING_ISO_8859_4 ISO 8859-4
+ ONIG_ENCODING_ISO_8859_5 ISO 8859-5
+ ONIG_ENCODING_ISO_8859_6 ISO 8859-6
+ ONIG_ENCODING_ISO_8859_7 ISO 8859-7
+ ONIG_ENCODING_ISO_8859_8 ISO 8859-8
+ ONIG_ENCODING_ISO_8859_9 ISO 8859-9
+ ONIG_ENCODING_ISO_8859_10 ISO 8859-10
+ ONIG_ENCODING_ISO_8859_11 ISO 8859-11
+ ONIG_ENCODING_ISO_8859_13 ISO 8859-13
+ ONIG_ENCODING_ISO_8859_14 ISO 8859-14
+ ONIG_ENCODING_ISO_8859_15 ISO 8859-15
+ ONIG_ENCODING_ISO_8859_16 ISO 8859-16
+ ONIG_ENCODING_UTF8 UTF-8
+ ONIG_ENCODING_UTF16_BE UTF-16BE
+ ONIG_ENCODING_UTF16_LE UTF-16LE
+ ONIG_ENCODING_UTF32_BE UTF-32BE
+ ONIG_ENCODING_UTF32_LE UTF-32LE
+ ONIG_ENCODING_EUC_JP EUC-JP
+ ONIG_ENCODING_EUC_TW EUC-TW
+ ONIG_ENCODING_EUC_KR EUC-KR
+ ONIG_ENCODING_EUC_CN EUC-CN
+ ONIG_ENCODING_SJIS Shift_JIS
+ ONIG_ENCODING_KOI8 KOI8
+ ONIG_ENCODING_KOI8_R KOI8-R
+ ONIG_ENCODING_BIG5 Big5
+ ONIG_ENCODING_GB18030 GB 18030
+
+ ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹
+
+ 6 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ
+
+ ONIG_SYNTAX_ASIS plain text
+ ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
+ ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
+ ONIG_SYNTAX_EMACS Emacs
+ ONIG_SYNTAX_GREP grep
+ ONIG_SYNTAX_GNU_REGEX GNU regex
+ ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
+ ONIG_SYNTAX_PERL Perl
+ ONIG_SYNTAX_PERL_NG Perl + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
+ ONIG_SYNTAX_RUBY Ruby
+ ONIG_SYNTAX_DEFAULT default (== Ruby)
+ onig_set_default_syntax()
+
+ ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigSyntaxType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹
+
+ 7 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹
+ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
+
+
+# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£
+ ¤³¤Î´Ø¿ô¤Ï¡¢onig_new()¤Î¥Ç¥é¥Ã¥¯¥¹ÈÇ¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+ 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó
+ 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length)
+ 4 ci: ¥³¥ó¥Ñ¥¤¥ë¾ðÊó
+
+ ci->num_of_elements: ci¤ÎÍ×ÁÇ¿ô (¸½ºß¤ÎÈǤǤÏ: 5)
+ ci->pattern_enc: ¥Ñ¥¿¡¼¥óʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ ci->target_enc: ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ ci->syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ
+ ci->option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó
+ ci->ambig_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î
+ ʸ»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥°
+
+ ONIGENC_AMBIGUOUS_MATCH_NONE: Û£ËæÌµ¤·
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCII¤ÎÂçʸ»ú¾®Ê¸»ú
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú
+ ONIGENC_AMBIGUOUS_MATCH_FULL: Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í­¸ú
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
+ onig_set_default_ambig_flag()
+
+ 5 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹
+ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
+
+
+ °Û¤Ê¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ÎÁȤ߹ç¤ï¤»¤Ï¡¢°Ê²¼¤Î¾ì¹ç¤Ë¤Î¤ßµö¤µ¤ì¤ë¡£
+
+ pattern_enc: ASCII, ISO_8859_1
+ target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
+
+ pattern_enc: UTF16_BE/LE
+ target_enc: UTF16_LE/BE
+
+ pattern_enc: UTF32_BE/LE
+ target_enc: UTF32_LE/BE
+
+
+# void onig_free(regex_t* reg)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+
+# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
+ const UChar* range, OnigRegion* region, OnigOptionType option)
+
+ Àµµ¬É½¸½¤Çʸ»úÎó¤ò¸¡º÷¤·¡¢¸¡º÷·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á°ÌÃÖ (p - str >= 0)
+ ¸¡º÷¼ºÇÔ: ONIG_MISMATCH (< 0)
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 str: ¸¡º÷ÂоÝʸ»úÎó
+ 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹
+ 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹
+ Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ < range)
+ ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ <= start)
+ 6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë)
+ 7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë
+
+
+# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
+ OnigRegion* region, OnigOptionType option)
+
+ ʸ»úÎó¤Î»ØÄê°ÌÃ֤ǥޥåÁ¥ó¥°¤ò¹Ô¤¤¡¢·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á¤·¤¿¥Ð¥¤¥ÈĹ (>= 0)
+ not match: ONIG_MISMATCH ( < 0)
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 str: ¸¡º÷ÂоÝʸ»úÎó
+ 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 at: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷¥¢¥É¥ì¥¹
+ 5 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë)
+ 6 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë
+
+
+# OnigRegion* onig_region_new(void)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òºîÀ®¤¹¤ë¡£
+
+
+# void onig_region_free(OnigRegion* region, int free_self)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤Ç»ÈÍѤµ¤ì¤Æ¤¤¤ë¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊ󥪥֥¸¥§¥¯¥È
+ 2 free_self: [1: region¼«¿È¤ò´Þ¤á¤ÆÁ´¤Æ²òÊü, 0: region¼«¿È¤Ï²òÊü¤·¤Ê¤¤]
+
+
+# void onig_region_copy(OnigRegion* to, OnigRegion* from)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òÊ£À½¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝÎΰè
+ 2 from: ¸µÎΰè
+
+
+# void onig_region_clear(OnigRegion* region)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÃæÌ£¤ò¥¯¥ê¥¢¤¹¤ë¡£
+
+ °ú¿ô
+ 1 region: ÂоÝÎΰè
+
+
+# int onig_region_resize(OnigRegion* region, int n)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¿ô¤òÊѹ¹¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 region: ÂоÝÎΰè
+ 2 n: ¿·¤·¤¤¥µ¥¤¥º
+
+
+# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
+ int** num_list)
+
+ »ØÄꤷ¤¿Ì¾Á°¤ËÂФ¹¤ë̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤Î
+ ¥°¥ë¡¼¥×ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¡£
+ ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï¡¢(?<name>....)¤Ë¤è¤Ã¤ÆÄêµÁ¤Ç¤­¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: »ØÄꤵ¤ì¤¿Ì¾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¿ô
+ (Îã /(?<x>..)(?<x>..)/ ==> 2)
+ ̾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¤¬Â¸ºß¤·¤Ê¤¤: -1
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾
+ 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 num_list: ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+
+
+# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
+ OnigRegion *region)
+
+ »ØÄꤵ¤ì¤¿Ì¾Á°¤Î¸åÊý»²¾È(\k<name>)¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤ÎÈÖ¹æ¤òÊÖ¤¹¡£
+ ̾Á°¤ËÂФ·¤Æ¡¢Ê£¿ô¤Î¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤Ç¤¢¤ì¤Ð¡¢¤½¤ÎÃæ¤ÎºÇÂç¤ÎÈÖ¹æ¤òÊÖ¤¹¡£
+ ̾Á°¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç¤¬°ì¸Ä¤·¤«¤Ê¤¤¤È¤­¤Ë¤Ï¡¢Âбþ¤¹¤ë¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤«
+ ¤É¤¦¤«¤Ë´Ø·¸¤Ê¤¯¡¢¤½¤ÎÈÖ¹æ¤òÊÖ¤¹¡£(½¾¤Ã¤Æ¡¢region¤Ë¤ÏNULL¤òÅϤ·¤Æ¤â¤è¤¤¡£)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ÈÖ¹æ
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾
+ 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 region: search/match·ë²Ì¤Î¥Þ¥Ã¥ÁÎΰè
+
+
+# int onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
+ void* arg)
+
+ Á´¤Æ¤Î̾Á°¤ËÂФ·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¸Æ¤Ó½Ð¤·¤ò¼Â¹Ô¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0
+ ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô
+ func(name, name_end, <number of groups>, <group number's list>,
+ reg, arg);
+
+ func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î¥³¡¼¥ë¥Ð¥Ã¥¯¤Ï¹Ô¤Ê¤ï¤º¤Ë
+ ½ªÎ»¤¹¤ë¡£
+
+ 3 arg: func¤ËÂФ¹¤ëÄɲðú¿ô
+
+
+# int onig_number_of_names(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Ì¾Á°¤Î¿ô¤òÊÖ¤¹¡£
+ °ì¸Ä¤Î̾Á°¤Î¿½ÅÄêµÁ¤Ï°ì¸Ä¤È´ÇÐö¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# OnigEncoding onig_get_encoding(regex_t* reg)
+# OnigOptionType onig_get_options(regex_t* reg)
+# OnigAmbigType onig_get_ambig_flag(regex_t* reg)
+# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# int onig_number_of_captures(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³Í¥°¥ë¡¼¥×¤Î¿ô¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# int onig_number_of_capture_histories(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³ÍÍúÎò(?@...)¤Î¿ô¤òÊÖ¤¹¡£
+
+ »ÈÍѤ¹¤ëʸˡ¤ÇÊá³ÍÍúÎòµ¡Ç½¤¬Í­¸ú(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)
+ ¤Ç¤Ê¤±¤ì¤Ð¡¢Êá³ÍÍúÎòµ¡Ç½¤Ï»ÈÍѤǤ­¤Ê¤¤¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
+
+ Êá³ÍÍúÎò¥Ç¡¼¥¿¤Î¥ë¡¼¥È¥Î¡¼¥É¤òÊÖ¤¹¡£
+
+ ¥Þ¥Ã¥Á¤¬¼ºÇÔ¤·¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢¤³¤ÎÃͤÏÉÔÄê¤Ç¤¢¤ë¡£
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè
+
+
+# int onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*func)(int,int,int,int,int,void*), void* arg)
+
+ Êá³ÍÍúÎò¥Ç¡¼¥¿ÌÚ¤ò½ä²ó¤·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0
+ ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè
+ 2 at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤ò¹Ô¤Ê¤¦¥¿¥¤¥ß¥ó¥°
+
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST:
+ ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó
+ ONIG_TRAVERSE_CALLBACK_AT_LAST:
+ »Ò¥Î¡¼¥É¤ò½ä²ó¤·¤Æ¡¢¥³¡¼¥ë¥Ð¥Ã¥¯
+ ONIG_TRAVERSE_CALLBACK_AT_BOTH:
+ ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó¡¢ºÇ¸å¤Ë¤â¤¦°ìÅÙ¥³¡¼¥ë¥Ð¥Ã¥¯
+
+ 3 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô
+ func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î½ä²ó¤Ï¹Ô¤Ê¤ï¤º¤Ë
+ ½ªÎ»¤¹¤ë¡£
+
+ int func(int group, int beg, int end, int level, int at,
+ void* arg)
+ group: ¥°¥ë¡¼¥×ÈÖ¹æ
+ beg: ¥Þ¥Ã¥Á³«»Ï°ÌÃÖ
+ end ¥Þ¥Ã¥Á½ªÎ»°ÌÃÖ
+ level: ¥Í¥¹¥È¥ì¥Ù¥ë (0¤«¤é)
+ at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤¬¸Æ¤Ó½Ð¤µ¤ì¤¿¥¿¥¤¥ß¥ó¥°
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST
+ ONIG_TRAVERSE_CALLBACK_AT_LAST
+ arg: Äɲðú¿ô
+
+ 4 arg; func¤ËÂФ¹¤ëÄɲðú¿ô
+
+
+# int onig_noname_group_capture_is_active(regex_t* reg)
+
+ ̾Á°¤Ê¤·¼°½¸¹ç¤ÎÊá³Íµ¡Ç½¤¬Í­¸ú¤«¤É¤¦¤«¤òÊÖ¤¹¡£
+
+ Í­¸ú: 1
+ ̵¸ú: 0
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+ ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_DONT_CAPTURE_GROUP¤¬ON --> ̵¸ú
+
+ ¥Ñ¥¿¡¼¥ó¤¬Ì¾Á°¤Ä¤­¼°½¸¹ç¤ò»ÈÍѤ·¤Æ¤¤¤ë
+ AND »ÈÍÑʸˡ¤Ç¡¢ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP¤¬ON
+ AND ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_CAPTURE_GROUP¤¬OFF
+ --> ̵¸ú
+
+ ¾åµ­°Ê³°¤Î¾ì¹ç --> Í­¸ú
+
+
+# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+
+ ʸ»ú°ì¸ÄʬÁ°¤Îʸ»úÎó°ÌÃÖ¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ëº¸Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ë±¦Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+
+ ʸ»úÎó¤Îʸ»ú¿ô¤òÊÖ¤¹¡£
+
+
+# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+
+ ʸ»úÎó¤Î¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
+
+
+# int onig_set_default_syntax(OnigSyntaxType* syntax)
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÀµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ °ú¿ô
+ 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ
+
+
+# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+
+ Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥³¥Ô¡¼¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝ
+ 2 from: ¸µ
+
+
+# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
+# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
+
+# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+
+ Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÎÍ×ÁǤò»²¾È/¼èÆÀ¤¹¤ë¡£
+
+ °ú¿ô
+ 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ
+ 2 op, op2, behavior, options: Í×ÁǤÎÃÍ
+
+
+# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
+
+ ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò¥³¥Ô¡¼¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝ
+ 2 from: ¸µ
+
+
+# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+ OnigCodePoint code)
+
+ ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£
+ ONIG_SYN_OP_VARIABLE_META_CHARACTERS¤¬Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÇÍ­¸ú¤Ë
+ ¤Ê¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢¥¨¥¹¥±¡¼¥×ʸ»ú¤ò½ü¤¤¤Æ¡¢¤³¤³¤Ç»ØÄꤷ¤¿¥á¥¿Ê¸»ú¤Ï
+ µ¡Ç½¤·¤Ê¤¤¡£(Áȹþ¤ß¤Îʸˡ¤Ç¤ÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤¡£)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 enc: ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê
+
+ ONIG_META_CHAR_ESCAPE
+ ONIG_META_CHAR_ANYCHAR
+ ONIG_META_CHAR_ANYTIME
+ ONIG_META_CHAR_ZERO_OR_ONE_TIME
+ ONIG_META_CHAR_ONE_OR_MORE_TIME
+ ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+ 3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR.
+
+
+# OnigAmbigType onig_get_default_ambig_flag()
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£
+
+
+# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ °ú¿ô
+ 1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°
+
+
+# unsigned int onig_get_match_stack_limit_size(void)
+
+ ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤòÊÖ¤¹¡£
+ (¥Ç¥Õ¥©¥ë¥È: 0 == ̵À©¸Â)
+
+
+# int onig_set_match_stack_limit_size(unsigned int size)
+
+ ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤò»ØÄꤹ¤ë¡£
+ (size = 0: ̵À©¸Â)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+
+# int onig_end(void)
+
+ ¥é¥¤¥Ö¥é¥ê¤Î»ÈÍѤò½ªÎ»¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ onig_init()¤òºÆÅٸƤӽФ·¤Æ¤â¡¢°ÊÁ°¤ËºîÀ®¤·¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ ¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¤Ç¤­¤Ê¤¤¡£
+
+
+# const char* onig_version(void)
+
+ ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8")
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ
new file mode 100644
index 0000000..dccf242
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/FAQ
@@ -0,0 +1,37 @@
+FAQ 2006/10/30
+
+1. Lognest match
+
+ You can execute longest match by using ONIG_OPTION_FIND_LONGEST option
+ in onig_new().
+
+
+2. Thread safe
+
+ In order to make thread safe, which of (A) or (B) must be done.
+
+ (A) Oniguruma Layer
+
+ Define the macro below at NOT_RUBY case in oniguruma/regint.h.
+
+ USE_MULTI_THREAD_SYSTEM
+ THREAD_ATOMIC_START
+ THREAD_ATOMIC_END
+ THREAD_PASS
+
+ THREAD_SYSTEM_INIT
+ THREAD_SYSTEM_END
+
+
+ (B) Application Layer
+
+ The plural threads should not do simultaneously that making
+ new regexp objects or re-compiling objects or freeing objects,
+ even if these objects are differ.
+
+
+3. Mailing list
+
+ There is no mailing list about Oniguruma.
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja
new file mode 100644
index 0000000..5582765
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/FAQ.ja
@@ -0,0 +1,122 @@
+FAQ 2006/10/30
+
+1. ºÇĹ¥Þ¥Ã¥Á
+
+ onig_new()¤ÎÃæ¤Ç¡¢ONIG_OPTION_FIND_LONGEST¥ª¥×¥·¥ç¥ó
+ ¤ò»ÈÍѤ¹¤ì¤ÐºÇĹ¥Þ¥Ã¥Á¤Ë¤Ê¤ë¡£
+
+
+2. ¥¹¥ì¥Ã¥É¥»¡¼¥Õ
+
+ ¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢°Ê²¼¤Î(A)¤È(B)¤Î¤É¤Á¤é¤«¤ò¹Ô¤Ê¤¨¤Ð
+ ¤è¤¤¡£
+
+ (A) Oniguruma Layer
+
+ oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£
+
+ USE_MULTI_THREAD_SYSTEM
+ THREAD_ATOMIC_START
+ THREAD_ATOMIC_END
+ THREAD_PASS
+
+ ²¿¤é¤«¤Î½é´ü²½/½ªÎ»½èÍý¤¬É¬ÍפǤ¢¤ì¤Ð¡¢°Ê²¼¤Î¥Þ¥¯¥í¤ËÄêµÁ¤¹¤ë¡£
+ THREAD_SYSTEM_INIT
+ THREAD_SYSTEM_END
+
+
+ (B) Application Layer
+
+ Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢
+ ¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
+ ¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£
+
+ ¤â¤¦¾¯¤·¾Ü¤·¤¤ÀâÌÀ¤Ï¡¢¤³¤Î¥É¥­¥å¥á¥ó¥È¤ÎÃæ¤Î
+ "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£
+
+
+3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È
+
+ µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£
+
+//END
+
+
+
+¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­
+
+¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢¸ÄÊ̤Υ¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢
+Oniguruma¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢¤É¤Á¤é¤«¤òÁª¤Ö¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£
+(Oniguruma¤ò»ÈÍѤ¹¤ë¦¤ÇÂн褹¤ë¤«¡¢Oniguruma¤ËÂн褵¤»¤ë¤«
+¤É¤Á¤é¤«ÊÒÊý¤Ç¹Ô¤¦É¬Íפ¬¤¢¤ë¤È¤¤¤¦¤³¤È¤Ç¤¹¡£)
+
+¤³¤ì¤é¤ÎÊýË¡¤Ë¤Ä¤¤¤Æ¡¢°Ê²¼(A)¤È(B)¤ÇÀâÌÀ¤·¤Þ¤¹¡£
+
+¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ï¡¢¤½¤ì¤¾¤ì¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¤è¤Ã¤Æ¤â
+°Û¤Ê¤ê¤Þ¤¹¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤ÎÃæ¤Ç¶ñÂÎŪ¤Ë²¿¤ò¸Æ¤Ö¤Î¤«¤ò
+½ñ¤¯¤³¤È¤Ï̵Íý¤Ç¤¹¡£¼ÂºÝ¤Ë»ÈÍѤµ¤ì¤ë¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ç¡¢
+Âбþ¤¹¤ëµ¡Ç½¤Î¤â¤Î¤ò»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£
+
+(A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç
+
+oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç
+°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+USE_MULTI_THREAD_SYSTEM
+
+ ñ¤ËÍ­¸ú¤Ë¤¹¤ì¤Ð¤è¤¤¤Ç¤¹¡£
+
+THREAD_ATOMIC_START
+THREAD_ATOMIC_END
+
+ THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END¤Ç°Ï¤Þ¤ì¤¿
+ ¥×¥í¥°¥é¥à¤Î¥³¡¼¥ÉÉôʬ¤ò¤¢¤ë¥¹¥ì¥Ã¥É¤¬¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î
+ ¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Ê¤¤¤³¤È¤òÊݾ㤹¤ë¤â¤Î¤ËÄêµÁ
+ ¤·¤Æ¤¯¤À¤µ¤¤¡£
+ (̾Á°¤ÎÄ̤ꡢ°Ï¤Þ¤ì¤¿¥³¡¼¥ÉÉôʬ¤ò¥¹¥ì¥Ã¥É¥¢¥È¥ß¥Ã¥¯¤Ë
+ ¤¹¤ë¤È¤¤¤¦°ÕÌ£)
+
+THREAD_PASS
+
+ ¤³¤ì¤ò¼Â¹Ô¤·¤¿¥¹¥ì¥Ã¥É¤«¤é¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤ò°Ñ¾ù
+ ¤¹¤ë¤â¤Î¤ËÄêµÁ¤ò¤·¤Æ¤¯¤À¤µ¤¤¡£(ºÆ¥¹¥±¥¸¥å¡¼¥ë¤ò¸Æ¤Ó½Ð¤¹
+ ¤È¤¤¤¦°ÕÌ£)
+ Âбþ¤¹¤ëµ¡Ç½¤¬Á´¤¯¤Ê¤±¤ì¤Ð¡¢¶õÄêµÁ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+(»²¹ÍÎã)
+Ruby¤Î¾ì¹ç¤òÎã¤Ë¤¹¤ë¤È¡¢
+Ruby¤Ï¼«Ê¬¼«¿È¤ÇÆÈ¼«¤Î¥¹¥ì¥Ã¥Éµ¡Ç½¤ò¼ÂÁõ¤·¤Æ¤¤¤Þ¤¹¡£
+¤½¤Îµ¡Ç½¤ò»ÈÍѤ¹¤ë¤È¡¢°Ê²¼¤Î¤è¤¦¤ËÄêµÁ¤¹¤ì¤Ð¤è¤¤¤³¤È¤Ë
+¤Ê¤ê¤Þ¤¹¡£
+
+#define USE_MULTI_THREAD_SYSTEM
+#define THREAD_SYSTEM_INIT
+#define THREAD_SYSTEM_END
+#define THREAD_ATOMIC_START DEFER_INTS
+#define THREAD_ATOMIC_END ENABLE_INTS
+#define THREAD_PASS rb_thread_schedule()
+
+Ruby¤Î¾ì¹ç¡¢¥¿¥¤¥Þ³ä¤ê¹þ¤ß¤ò»ÈÍѤ·¤Æ¡¢¥¹¥ì¥Ã¥É¤ÎÀÚ¤êÂØ¤¨¤ò
+¹Ô¤Ã¤Æ¤¤¤Þ¤¹¡£DEFER_INTS¤Ï³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é¤Î¼Â¹Ô¤ò°ì»þŪ¤Ë
+»ß¤á¤ë¤¿¤á¤Î¥Þ¥¯¥í¤Ç¤¹¡£ENABLE_INTS¥Þ¥¯¥í¤Ç³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é
+¤Î¼Â¹Ô¤òµö²Ä¤·¤Þ¤¹¡£
+¤³¤ì¤Ë¤è¤Ã¤Æ¡¢THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END
+¤Ç°Ï¤Þ¤ì¤¿Éôʬ¤Î¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Þ¤»¤ó¡£
+
+
+(B) ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç
+
+°Ê²¼¤òÊݾ㤹¤ë¤è¤¦¤Ë¡¢¥¹¥ì¥Ã¥É¤Î¼Â¹Ô¤òÀ©¸æ¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò
+¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£
+
+onig_new(), onig_new_deluxe(), onig_free()¤Î¤É¤ì¤«¤Î¸Æ¤Ó½Ð¤·¤ò¡¢
+Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤¬Æ±»þ¤Ë¼Â¹Ô¤¹¤ë¤³¤È¤òÈò¤±¤Æ¤¯¤À¤µ¤¤¡£Æ±»þ¤Ç¤Ê¤±¤ì¤ÐÊ̤ˤ«¤Þ¤¤¤Þ¤»¤ó¡£
+
+¤³¤ì¤Ï²¿¸ÎɬÍפʤΤ«¤È¤¤¤¦¤È¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë
+²áÄø¤Ç¡¢ÆâÉô¤Ç¶¦Ä̤˻²¾È¤¹¤ë¥Æ¡¼¥Ö¥ë¤¬¤¢¤ê¤Þ¤¹¡£
+¤³¤Î¥Æ¡¼¥Ö¥ë¤ËÂФ·¤Æ¤Î¥Ç¡¼¥¿ÅÐÏ¿½èÍý¤¬Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤Ç¾×ÆÍ¤·¤Æ
+°Û¾ï¤Ê¾õÂ֤ˤʤé¤Ê¤¤¤¿¤á¤ËɬÍפǤ¹¡£
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE
new file mode 100644
index 0000000..5a2783d
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/RE
@@ -0,0 +1,412 @@
+Oniguruma Regular Expressions Version 4.3.0 2006/08/17
+
+syntax: ONIG_SYNTAX_RUBY (default)
+
+
+1. Syntax elements
+
+ \ escape (enable or disable meta character meaning)
+ | alternation
+ (...) group
+ [...] character class
+
+
+2. Characters
+
+ \t horizontal tab (0x09)
+ \v vertical tab (0x0B)
+ \n newline (0x0A)
+ \r return (0x0D)
+ \b back space (0x08)
+ \f form feed (0x0C)
+ \a bell (0x07)
+ \e escape (0x1B)
+ \nnn octal char (encoded byte value)
+ \xHH hexadecimal char (encoded byte value)
+ \x{7HHHHHHH} wide hexadecimal char (character code point value)
+ \cx control char (character code point value)
+ \C-x control char (character code point value)
+ \M-x meta (x|0x80) (character code point value)
+ \M-\C-x meta control char (character code point value)
+
+ (* \b is effective in character class [...] only)
+
+
+3. Character types
+
+ . any character (except newline)
+
+ \w word character
+
+ Not Unicode:
+ alphanumeric, "_" and multibyte char.
+
+ Unicode:
+ General_Category -- (Letter|Mark|Number|Connector_Punctuation)
+
+ \W non word char
+
+ \s whitespace char
+
+ Not Unicode:
+ \t, \n, \v, \f, \r, \x20
+
+ Unicode:
+ 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ General_Category -- Line_Separator
+ -- Paragraph_Separator
+ -- Space_Separator
+
+ \S non whitespace char
+
+ \d decimal digit char
+
+ Unicode: General_Category -- Decimal_Number
+
+ \D non decimal digit char
+
+ \h hexadecimal digit char [0-9a-fA-F]
+
+ \H non hexadecimal digit char
+
+
+4. Quantifier
+
+ greedy
+
+ ? 1 or 0 times
+ * 0 or more times
+ + 1 or more times
+ {n,m} at least n but not more than m times
+ {n,} at least n times
+ {,n} at least 0 but not more than n times ({0,n})
+ {n} n times
+
+ reluctant
+
+ ?? 1 or 0 times
+ *? 0 or more times
+ +? 1 or more times
+ {n,m}? at least n but not more than m times
+ {n,}? at least n times
+ {,n}? at least 0 but not more than n times (== {0,n}?)
+
+ possessive (greedy and does not backtrack after repeated)
+
+ ?+ 1 or 0 times
+ *+ 0 or more times
+ ++ 1 or more times
+
+ ({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only)
+
+ ex. /a*+/ === /(?>a*)/
+
+
+5. Anchors
+
+ ^ beginning of the line
+ $ end of the line
+ \b word boundary
+ \B not word boundary
+ \A beginning of string
+ \Z end of string, or before newline at the end
+ \z end of string
+ \G matching start position (*)
+
+ * Ruby Regexp:
+ previous end-of-match position
+ (This specification is not related to this library.)
+
+
+6. Character class
+
+ ^... negative class (lowest precedence operator)
+ x-y range from x to y
+ [...] set (character class in character class)
+ ..&&.. intersection (low precedence at the next of ^)
+
+ ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w]
+
+ * If you want to use '[', '-', ']' as a normal character
+ in a character class, you should escape these characters by '\'.
+
+
+ POSIX bracket ([:xxxxx:], negate [:^xxxxx:])
+
+ Not Unicode Case:
+
+ alnum alphabet or digit char
+ alpha alphabet
+ ascii code value: [0 - 127]
+ blank \t, \x20
+ cntrl
+ digit 0-9
+ graph include all of multibyte encoded characters
+ lower
+ print include all of multibyte encoded characters
+ punct
+ space \t, \n, \v, \f, \r, \x20
+ upper
+ xdigit 0-9, a-f, A-F
+
+
+ Unicode Case:
+
+ alnum Letter | Mark | Decimal_Number
+ alpha Letter | Mark
+ ascii 0000 - 007F
+ blank Space_Separator | 0009
+ cntrl Control | Format | Unassigned | Private_Use | Surrogate
+ digit Decimal_Number
+ graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+ lower Lowercase_Letter
+ print [[:graph:]] | [[:space:]]
+ punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+ Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+ Open_Punctuation
+ space Space_Separator | Line_Separator | Paragraph_Separator |
+ 0009 | 000A | 000B | 000C | 000D | 0085
+ upper Uppercase_Letter
+ xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ (0-9, a-f, A-F)
+
+
+7. Extended groups
+
+ (?#...) comment
+
+ (?imx-imx) option on/off
+ i: ignore case
+ m: multi-line (dot(.) match newline)
+ x: extended form
+ (?imx-imx:subexp) option on/off for subexp
+
+ (?:subexp) not captured group
+ (subexp) captured group
+
+ (?=subexp) look-ahead
+ (?!subexp) negative look-ahead
+ (?<=subexp) look-behind
+ (?<!subexp) negative look-behind
+
+ Subexp of look-behind must be fixed character length.
+ But different character length is allowed in top level
+ alternatives only.
+ ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
+
+ In negative-look-behind, captured group isn't allowed,
+ but shy group(?:) is allowed.
+
+ (?>subexp) atomic group
+ don't backtrack in subexp.
+
+ (?<name>subexp) define named group
+ (All characters of the name must be a word character.
+ And first character must not be a digit or uppper case)
+
+ Not only a name but a number is assigned like a captured
+ group.
+
+ Assigning the same name as two or more subexps is allowed.
+ In this case, a subexp call can not be performed although
+ the back reference is possible.
+
+
+8. Back reference
+
+ \n back reference by group number (n >= 1)
+ \k<name> back reference by group name
+
+ In the back reference by the multiplex definition name,
+ a subexp with a large number is referred to preferentially.
+ (When not matched, a group of the small number is referred to.)
+
+ * Back reference by group number is forbidden if named group is defined
+ in the pattern and ONIG_OPTION_CAPTURE_GROUP is not setted.
+
+
+ back reference with nest level
+
+ (This function is disabled in Ruby 1.9.)
+
+ \k<name+n> n: 0, 1, 2, ...
+ \k<name-n> n: 0, 1, 2, ...
+
+ Destinate relative nest level from back reference position.
+
+ ex 1.
+
+ /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
+
+ ex 2.
+
+ r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
+ (?<stag> < \g<name> \s* > ){0}
+ (?<name> [a-zA-Z_:]+ ){0}
+ (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
+ (?<etag> </ \k<name+1> >){0}
+ \g<element>
+ __REGEXP__
+
+ p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
+
+
+
+9. Subexp call ("Tanaka Akira special")
+
+ \g<name> call by group name
+ \g<n> call by group number (n >= 1)
+
+ * left-most recursive call is not allowed.
+ ex. (?<name>a|\g<name>b) => error
+ (?<name>a|b\g<name>c) => OK
+
+ * Call by group number is forbidden if named group is defined in the pattern
+ and ONIG_OPTION_CAPTURE_GROUP is not setted.
+
+ * If the option status of called group is different from calling position
+ then the group's option is effective.
+
+ ex. (?-i:\g<name>)(?i:(?<name>a)){0} match to "A"
+
+
+10. Captured group
+
+ Behavior of the no-named group (...) changes with the following conditions.
+ (But named group is not changed.)
+
+ case 1. /.../ (named group is not used, no option)
+
+ (...) is treated as a captured group.
+
+ case 2. /.../g (named group is not used, 'g' option)
+
+ (...) is treated as a no-captured group (?:...).
+
+ case 3. /..(?<name>..)../ (named group is used, no option)
+
+ (...) is treated as a no-captured group (?:...).
+ numbered-backref/call is not allowed.
+
+ case 4. /..(?<name>..)../G (named group is used, 'G' option)
+
+ (...) is treated as a captured group.
+ numbered-backref/call is allowed.
+
+ where
+ g: ONIG_OPTION_DONT_CAPTURE_GROUP
+ G: ONIG_OPTION_CAPTURE_GROUP
+
+ ('g' and 'G' options are argued in ruby-dev ML)
+
+ These options are not implemented in Ruby level.
+
+
+-----------------------------
+A-1. Syntax depend options
+
+ + ONIG_SYNTAX_RUBY
+ (?m): dot(.) match newline
+
+ + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA
+ (?s): dot(.) match newline
+ (?m): ^ match after newline, $ match before newline
+
+
+A-2. Original extensions
+
+ + hexadecimal digit char type \h, \H
+ + named group (?<name>...)
+ + named backref \k<name>
+ + subexp call \g<name>, \g<group-num>
+
+
+A-3. Lacked features compare with perl 5.8.0
+
+ + [:word:]
+ + \N{name}
+ + \l,\u,\L,\U, \X, \C
+ + (?{code})
+ + (??{code})
+ + (?(condition)yes-pat|no-pat)
+
+ * \Q...\E
+ This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
+
+ * \p{property}, \P{property}
+ This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
+ Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+ Print, Punct, Space, Upper, XDigit, ASCII are supported.
+
+ Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only.
+ ex. \p{IsXDigit}.
+
+ Negation operator of property is supported in ONIG_SYNTAX_PERL only.
+ \p{^...}, \P{^...}
+
+
+A-4. Differences with Japanized GNU regex(version 0.12) of Ruby
+
+ + add hexadecimal digit char type (\h, \H)
+ + add look-behind
+ (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
+ + add possessive quantifier. ?+, *+, ++
+ + add operations in character class. [], &&
+ ('[' must be escaped as an usual char in character class.)
+ + add named group and subexp call.
+ + octal or hexadecimal number sequence can be treated as
+ a multibyte code char in character class if multibyte encoding
+ is specified.
+ (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ + allow the range of single byte char and multibyte char in character
+ class.
+ ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding.
+ + effect range of isolated option is to next ')'.
+ ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
+ + isolated option is not transparent to previous pattern.
+ ex. a(?i)* is a syntax error pattern.
+ + allowed incompleted left brace as an usual string.
+ ex. /{/, /({)/, /a{2,3/ etc...
+ + negative POSIX bracket [:^xxxx:] is supported.
+ + POSIX bracket [:ascii:] is added.
+ + repeat of look-ahead is not allowed.
+ ex. /(?=a)*/, /(?!b){5}/
+ + Ignore case option is effective to numbered character.
+ ex. /\x61/i =~ "A"
+ + In the range quantifier, the number of the minimum is omissible.
+ /a{,n}/ == /a{0,n}/
+ The simultanious abbreviation of the number of times of the minimum
+ and the maximum is not allowed. (/a{,}/)
+ + /a{n}?/ is not a non-greedy operator.
+ /a{n}?/ == /(?:a{n})?/
+ + invalid back reference is checked and cause error.
+ /\1/, /(a)\2/
+ + Zero-length match in infinite repeat stops the repeat,
+ then changes of the capture group status are checked as stop condition.
+ /(?:()|())*\1\2/ =~ ""
+ /(?:\1a|())*/ =~ "a"
+
+
+A-5. Disabled functions by default syntax
+
+ + capture history
+
+ (?@...) and (?@<name>...)
+
+ ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
+
+ see sample/listcap.c file.
+
+
+A-6. Problems
+
+ + Invalid encoding byte sequence is not checked in UTF-8.
+
+ * Invalid first byte is treated as a character.
+ /./u =~ "\xa3"
+
+ * Incomplete byte sequence is not checked.
+ /\w+/ =~ "a\xf3\x8ec"
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/RE.ja b/ext/mbstring/oniguruma/doc/RE.ja
new file mode 100644
index 0000000..5168171
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/RE.ja
@@ -0,0 +1,424 @@
+µ´¼Ö Àµµ¬É½¸½ Version 4.3.0 2006/08/17
+
+»ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ)
+
+
+1. ´ðËÜÍ×ÁÇ
+
+ \ ÂàÈò½¤¾þ (¥¨¥¹¥±¡¼¥×) Àµµ¬É½¸½µ­¹æ¤ÎÍ­¸ú/̵¸ú¤ÎÀ©¸æ
+ | ÁªÂò»Ò
+ (...) ¼°½¸¹ç (¥°¥ë¡¼¥×)
+ [...] ʸ»ú½¸¹ç (ʸ»ú¥¯¥é¥¹)
+
+
+2. ʸ»ú
+
+ \t ¿åÊ¿¥¿¥Ö (0x09)
+ \v ¿âľ¥¿¥Ö (0x0B)
+ \n ²þ¹Ô (0x0A)
+ \r Éüµ¢ (0x0D)
+ \b ¸åÂà¶õÇò (0x08)
+ \f ²þÊÇ (0x0C)
+ \a ¾â (0x07)
+ \e ÂàÈò½¤¾þ (0x1B)
+ \nnn Ȭ¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô)
+ \xHH ½½Ï»¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô)
+ \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \cx À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \C-x À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \M-\C-x Ķ + À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+
+ ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ßÍ­¸ú
+
+
+3. ʸ»ú¼ï
+
+ . Ǥ°Õʸ»ú (²þ¹Ô¤ò½ü¤¯)
+
+ \w ñ¸ì¹½À®Ê¸»ú
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+ ±Ñ¿ô»ú, "_" ¤ª¤è¤Ó ¿¥Ð¥¤¥Èʸ»ú¡£
+
+ Unicode¤Î¾ì¹ç:
+ General_Category -- (Letter|Mark|Number|Connector_Punctuation)
+
+ \W Èóñ¸ì¹½À®Ê¸»ú
+
+ \s ¶õÇòʸ»ú
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+ \t, \n, \v, \f, \r, \x20
+
+ Unicode¤Î¾ì¹ç:
+ 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ General_Category -- Line_Separator
+ -- Paragraph_Separator
+ -- Space_Separator
+
+ \S Èó¶õÇòʸ»ú
+
+ \d 10¿Ê¿ô»ú
+
+ Unicode¤Î¾ì¹ç: General_Category -- Decimal_Number
+
+ \D Èó10¿Ê¿ô»ú
+
+ \h 16¿Ê¿ô»ú [0-9a-fA-F]
+
+ \H Èó16¿Ê¿ô»ú
+
+
+
+4. ÎÌ»ØÄê»Ò
+
+ ÍßÄ¥¤ê
+
+ ? °ì²ó¤Þ¤¿¤ÏÎí²ó
+ * Îí²ó°Ê¾å
+ + °ì²ó°Ê¾å
+ {n,m} n²ó°Ê¾åm²ó°Ê²¼
+ {n,} n²ó°Ê¾å
+ {,n} Îí²ó°Ê¾ån²ó°Ê²¼ ({0,n})
+ {n} n²ó
+
+ ̵Íß
+
+ ?? °ì²ó¤Þ¤¿¤ÏÎí²ó
+ *? Îí²ó°Ê¾å
+ +? °ì²ó°Ê¾å
+ {n,m}? n²ó°Ê¾åm²ó°Ê²¼
+ {n,}? n²ó°Ê¾å
+ {,n}? Îí²ó°Ê¾ån²ó°Ê²¼ (== {0,n}?)
+
+ ¶¯Íß (ÍßÄ¥¤ê¤Ç¡¢·«¤êÊÖ¤·¤ËÀ®¸ù¤·¤¿¸å¤Ï²ó¿ô¤ò¸º¤é¤¹¤è¤¦¤Ê¸åÂàºÆ»î¹Ô¤ò¤·¤Ê¤¤)
+
+ ?+ °ì²ó¤Þ¤¿¤ÏÎí²ó
+ *+ Îí²ó°Ê¾å
+ ++ °ì²ó°Ê¾å
+
+ ({n,m}+, {n,}+, {n}+ ¤Ï¡¢ONIG_SYNTAX_JAVA¤Ç¤Î¤ß¶¯ÍߤʻØÄê»Ò)
+
+ Îã. /a*+/ === /(?>a*)/
+
+
+5. ÉÅ
+
+ ^ ¹ÔƬ
+ $ ¹ÔËö
+ \b ñ¸ì¶­³¦
+ \B Èóñ¸ì¶­³¦
+ \A ʸ»úÎóÀèÆ¬
+ \Z ʸ»úÎóËöÈø¡¢¤Þ¤¿¤Ïʸ»úÎóËöÈø¤Î²þ¹Ô¤ÎľÁ°
+ \z ʸ»úÎóËöÈø
+ \G ¾È¹ç³«»Ï°ÌÃÖ(*)
+
+ * Ruby Regexp:
+ Á°²ó¾È¹çÀ®¸ùËöÈø°ÌÃÖ
+ (¤³¤Î»ÅÍͤÏRuby¤Î¼ÂÁõ¤Ë´Ø¤¹¤ë¤â¤Î¤Ç¤¢¤ê¡¢
+ Àµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤È¤Ï̵´Ø·¸)
+
+
+6. ʸ»ú½¸¹ç
+
+ ^... ÈÝÄê (ºÇÄãÍ¥ÀèÅٱ黻»Ò)
+ x-y ÈÏ°Ï (x¤«¤éy¤Þ¤Ç)
+ [...] ½¸¹ç (ʸ»ú½¸¹çÆâʸ»ú½¸¹ç)
+ ..&&.. Àѱ黻 (^¤Î¼¡¤ËÍ¥ÀèÅÙ¤¬Ä㤤±é»»»Ò)
+
+ Îã. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
+
+ ¢¨ '[', '-', ']'¤ò¡¢Ê¸»ú½¸¹çÆâ¤ÇÄ̾ïʸ»ú¤Î°ÕÌ£¤Ç»ÈÍѤ·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¤³¤ì¤é¤Îʸ»ú¤ò'\'¤ÇÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+
+
+ POSIX¥Ö¥é¥±¥Ã¥È ([:xxxxx:], ÈÝÄê [:^xxxxx:])
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+
+ alnum ±Ñ¿ô»ú
+ alpha 񥯣
+ ascii 0 - 127
+ blank \t, \x20
+ cntrl
+ digit 0-9
+ graph ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+ lower
+ print ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+ punct
+ space \t, \n, \v, \f, \r, \x20
+ upper
+ xdigit 0-9, a-f, A-F
+
+ Unicode¤Î¾ì¹ç:
+
+ alnum Letter | Mark | Decimal_Number
+ alpha Letter | Mark
+ ascii 0000 - 007F
+ blank Space_Separator | 0009
+ cntrl Control | Format | Unassigned | Private_Use | Surrogate
+ digit Decimal_Number
+ graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+ lower Lowercase_Letter
+ print [[:graph:]] | [[:space:]]
+ punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+ Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+ Open_Punctuation
+ space Space_Separator | Line_Separator | Paragraph_Separator |
+ 0009 | 000A | 000B | 000C | 000D | 0085
+ upper Uppercase_Letter
+ xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ (0-9, a-f, A-F)
+
+
+7. ³ÈÄ¥¼°½¸¹ç
+
+ (?#...) Ãí¼á
+ (?imx-imx) ¸ÉΩ¥ª¥×¥·¥ç¥ó
+ i: Âçʸ»ú¾®Ê¸»ú¾È¹ç
+ m: Ê£¿ô¹Ô
+ x: ³ÈÄ¥·Á¼°
+ (?imx-imx:¼°) ¼°¥ª¥×¥·¥ç¥ó
+
+ (¼°) Êá³Í¼°½¸¹ç
+ (?:¼°) ÈóÊá³Í¼°½¸¹ç
+
+ (?=¼°) ÀèÆÉ¤ß
+ (?!¼°) ÈÝÄêÀèÆÉ¤ß
+ (?<=¼°) Ìá¤êÆÉ¤ß
+ (?<!¼°) ÈÝÄêÌá¤êÆÉ¤ß
+
+ Ìá¤êÆÉ¤ß¤Î¼°¤Ï¸ÇÄêʸ»úĹ¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+ ¤·¤«¤·¡¢ºÇ¾å°Ì¤ÎÁªÂò»Ò¤À¤±¤Ï°Û¤Ê¤Ã¤¿Ê¸»úŤ¬µö¤µ¤ì¤ë¡£
+ Îã. (?<=a|bc) ¤Ïµö²Ä. (?<=aaa(?:b|cd)) ¤ÏÉÔµö²Ä
+
+ ÈÝÄêÌá¤êÆÉ¤ß¤Ç¤Ï¡¢Êá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤Ê¤¤¤¬¡¢
+ ÈóÊá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤ë¡£
+
+ (?>¼°) ¸¶»ÒŪ¼°½¸¹ç
+ ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤­¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤
+
+ (?<name>¼°) ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
+ ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£
+ (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï
+ ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£)
+
+ ̾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+ ¤Î¤È¤­¤Ï¡¢Ì¾Á°¤ò»È¤ï¤Ê¤¤¤ÇÈÖ¹æ¤Ç¤â»²¾È¤Ç¤­¤ë¡£
+
+ Ê£¿ô¤Î¼°½¸¹ç¤ËƱ¤¸Ì¾Á°¤òÍ¿¤¨¤ë¤³¤È¤Ïµö¤µ¤ì¤Æ¤¤¤ë¡£
+ ¤³¤Î¾ì¹ç¤Ë¤Ï¡¢¤³¤Î̾Á°¤ò»ÈÍѤ·¤¿¸åÊý»²¾È¤Ï²Äǽ¤Ç¤¢¤ë¤¬¡¢
+ Éôʬ¼°¸Æ½Ð¤·¤Ï¤Ç¤­¤Ê¤¤¡£
+
+
+8. ¸åÊý»²¾È
+
+ \n ÈÖ¹æ»ØÄ껲¾È (n >= 1)
+ \k<name> ̾Á°»ØÄ껲¾È
+
+ ̾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢
+ ÈÖ¹æ¤ÎÂ礭¤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£
+ (¥Þ¥Ã¥Á¤·¤Ê¤¤¤È¤­¤Ë¤ÏÈÖ¹æ¤Î¾®¤µ¤¤¼°½¸¹ç¤¬»²¾È¤µ¤ì¤ë)
+
+ ¢¨ ÈÖ¹æ»ØÄ껲¾È¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢
+ ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¶Ø»ß¤µ¤ì¤ë¡£(10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+
+
+ ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤­¸åÊý»²¾È
+
+ ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£
+
+ \k<name+n> n: 0, 1, 2, ...
+ \k<name-n> n: 0, 1, 2, ...
+
+ ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î
+ Êá³ÍÃͤò»²¾È¤¹¤ë¡£
+
+ Îã-1.
+
+ /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
+
+ Îã-2.
+
+ r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
+ (?<stag> < \g<name> \s* > ){0}
+ (?<name> [a-zA-Z_:]+ ){0}
+ (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
+ (?<etag> </ \k<name+1> >){0}
+ \g<element>
+ __REGEXP__
+
+ p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
+
+
+
+9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæÅ¯¥¹¥Ú¥·¥ã¥ë")
+
+ \g<name> ̾Á°»ØÄê¸Æ½Ð¤·
+ \g<n> ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1)
+
+ ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£
+ Îã. (?<name>a|\g<name>b) => error
+ (?<name>a|b\g<name>c) => OK
+
+ ¢¨ ÈÖ¹æ»ØÄê¸Æ½Ð¤·¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢
+ ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¶Ø»ß¤µ¤ì¤ë¡£ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+
+ ¢¨ ¸Æ¤Ó½Ð¤µ¤ì¤¿¼°½¸¹ç¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬¸Æ½Ð¤·Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂ֤ȰۤʤäƤ¤¤ë
+ ¤È¤­¡¢¸Æ¤Ó½Ð¤µ¤ì¤¿Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬Í­¸ú¤Ç¤¢¤ë¡£
+
+ Îã. (?-i:\g<name>)(?i:(?<name>a)){0} ¤Ï "A" ¤Ë¾È¹çÀ®¸ù¤¹¤ë¡£
+
+
+10. Êá³Í¼°½¸¹ç
+
+ Êá³Í¼°½¸¹ç(...)¤Ï¡¢°Ê²¼¤Î¾ò·ï¤Ë±þ¤¸¤Æ¿¶Éñ¤¬ÊѲ½¤¹¤ë¡£
+ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÊѲ½¤·¤Ê¤¤)
+
+ case 1. /.../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·)
+
+ (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+
+ case 2. /.../g (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'g'¤ò»ØÄê)
+
+ (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+
+ case 3. /..(?<name>..)../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·)
+
+ (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤ÏÉÔµö²Ä¡£
+
+ case 4. /..(?<name>..)../G (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'G'¤ò»ØÄê)
+
+ (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤Ïµö²Ä¡£
+
+ ⤷
+ g: ONIG_OPTION_DONT_CAPTURE_GROUP
+ G: ONIG_OPTION_CAPTURE_GROUP
+ ('g'¤È'G'¥ª¥×¥·¥ç¥ó¤Ï¡¢ruby-dev ML¤ÇµÄÏÀ¤µ¤ì¤¿¡£)
+
+ ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢
+ ̾Á°ÉÕ¤­Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À­¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦
+ ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£
+ ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£
+
+
+-----------------------------
+Êäµ­ 1. ʸˡ°Í¸¥ª¥×¥·¥ç¥ó
+
+ + ONIG_SYNTAX_RUBY
+ (?m): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù
+
+ + ONIG_SYNTAX_PERL ¤È ONIG_SYNTAX_JAVA
+ (?s): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù
+ (?m): ^ ¤Ï²þ¹Ô¤Îľ¸å¤Ë¾È¹ç¤¹¤ë¡¢$ ¤Ï²þ¹Ô¤ÎľÁ°¤Ë¾È¹ç¤¹¤ë
+
+
+Êäµ­ 2. ÆÈ¼«³ÈÄ¥µ¡Ç½
+
+ + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú \h, \H
+ + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç (?<name>...)
+ + ̾Á°»ØÄê¸åÊý»²¾È \k<name>
+ + Éôʬ¼°¸Æ½Ð¤· \g<name>, \g<group-num>
+
+
+Êäµ­ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤ÆÂ¸ºß¤·¤Ê¤¤µ¡Ç½
+
+ + [:word:]
+ + \N{name}
+ + \l,\u,\L,\U, \X, \C
+ + (?{code})
+ + (??{code})
+ + (?(condition)yes-pat|no-pat)
+
+ * \Q...\E
+ ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
+
+ * \p{property}, \P{property}
+ ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
+ Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+ Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤­¤ë¡£
+
+ ÆÃÀ­Ì¾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß
+ µö¤µ¤ì¤Æ¤¤¤ë¡£
+ ex. \p{IsXDigit}.
+
+ ÆÃÀ­¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£
+ \p{^...}, \P{^...}
+
+
+Êäµ­ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤
+
+ + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà (\h, \H)
+ + Ìá¤êÆÉ¤ßµ¡Ç½¤òÄɲÃ
+ + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà (?+, *+, ++)
+ + ʸ»ú½¸¹ç¤ÎÃæ¤Î±é»»»Ò¤òÄɲà ([...], &&)
+ ('[' ¤Ï¡¢Ê¸»ú½¸¹ç¤ÎÃæ¤ÇÄ̾ï¤Îʸ»ú¤È¤·¤Æ»ÈÍѤ¹¤ë¤È¤­¤Ë¤Ï
+ ÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤)
+ + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤È¡¢Éôʬ¼°¸Æ½Ð¤·µ¡Ç½ÄɲÃ
+ + ¿¥Ð¥¤¥Èʸ»ú¥³¡¼¥É¤¬»ØÄꤵ¤ì¤Æ¤¤¤ë¤È¤­¡¢
+ ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹ç¤Çɽ¸½¤µ¤ì¤¿
+ °ì¸Ä¤Îʸ»ú¤È²ò¼á¤µ¤ì¤ë
+ (Îã. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ + ʸ»ú½¸¹ç¤ÎÃæ¤Ç¡¢°ì¥Ð¥¤¥Èʸ»ú¤È¿¥Ð¥¤¥Èʸ»ú¤ÎÈϰϻØÄê¤Ïµö¤µ¤ì¤ë¡£
+ ex. /[a-¤¢]/
+ + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤ÎÍ­¸úÈϰϤϡ¢¤½¤Î¸ÉΩ¥ª¥×¥·¥ç¥ó¤ò´Þ¤ó¤Ç¤¤¤ë¼°½¸¹ç¤Î
+ ½ª¤ï¤ê¤Þ¤Ç¤Ç¤¢¤ë
+ Îã. (?:(?i)a|b) ¤Ï (?:(?i:a|b)) ¤È²ò¼á¤µ¤ì¤ë¡¢(?:(?i:a)|b)¤Ç¤Ï¤Ê¤¤
+ + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤Ï¤½¤ÎÁ°¤Î¼°¤ËÂФ·¤ÆÆ©²áŪ¤Ç¤Ï¤Ê¤¤
+ Îã. /a(?i)*/ ¤Ïʸˡ¥¨¥é¡¼¤È¤Ê¤ë
+ + ÉÔ´°Á´¤Ê·«¤êÊÖ¤·ÈϰϻØÄê»Ò¤ÏÄ̾ï¤Îʸ»úÎó¤È¤·¤Æµö²Ä¤µ¤ì¤ë
+ Îã. /{/, /({)/, /a{2,3/
+ + ÈÝÄêŪPOSIX¥Ö¥é¥±¥Ã¥È [:^xxxx:] ¤òÄɲÃ
+ + POSIX¥Ö¥é¥±¥Ã¥È [:ascii:] ¤òÄɲÃ
+ + ÀèÆÉ¤ß¤Î·«¤êÊÖ¤·¤ÏÉÔµö²Ä
+ Îã. /(?=a)*/, /(?!b){5}/
+ + ¿ôÃͤǻØÄꤵ¤ì¤¿Ê¸»ú¤ËÂФ·¤Æ¤â¡¢Âçʸ»ú¾®Ê¸»ú¾È¹ç¥ª¥×¥·¥ç¥ó¤ÏÍ­¸ú
+ Îã. /\x61/i =~ "A"
+ + ·«¤êÊÖ¤·²ó¿ô»ØÄê¤Ç¡¢ºÇÄã²ó¿ô¤Î¾Êά(0²ó)¤¬¤Ç¤­¤ë
+ /a{,n}/ == /a{0,n}/
+ ºÇÄã²ó¿ô¤ÈºÇÂç²ó¿ô¤ÎƱ»þ¾Êά¤Ïµö¤µ¤ì¤Ê¤¤¡£(/a{,}/)
+ + /a{n}?/¤Ï̵Íߤʱ黻»Ò¤Ç¤Ï¤Ê¤¤¡£
+ /a{n}?/ == /(?:a{n})?/
+ + ̵¸ú¤Ê¸åÊý»²¾È¤ò¥Á¥§¥Ã¥¯¤·¤Æ¥¨¥é¡¼¤Ë¤¹¤ë¡£
+ /\1/, /(a)\2/
+ + ̵¸Â·«¤êÊÖ¤·¤ÎÃæ¤Ç¡¢Ä¹¤µÎí¤Ç¤Î¾È¹çÀ®¸ù¤Ï·«¤êÊÖ¤·¤òÃæÃǤµ¤»¤ë¤¬¡¢
+ ¤³¤Î¤È¤­¡¢ÃæÃǤ¹¤Ù¤­¤«¤É¤¦¤«¤ÎȽÄê¤È¤·¤Æ¡¢Êá³Í¼°½¸¹ç¤ÎÊá³Í¾õÂÖ¤Î
+ ÊѲ½¤Þ¤Ç¹Íθ¤·¤Æ¤¤¤ë
+ /(?:()|())*\1\2/ =~ ""
+ /(?:\1a|())*/ =~ "a"
+
+
+
+Êäµ­ 5. ¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë¤¬¡¢´ûÄêÃͤǤÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤µ¡Ç½
+
+ + Êá³ÍÍúÎò»²¾È
+
+ (?@...) ¤È (?@<name>...)
+
+ Îã. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
+
+ »ÈÍÑÊýË¡¤Ï¡¢sample/listcap.c¤ò»²¾È
+
+ Í­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤Íýͳ¤Ï¡¢¤É¤ÎÄøÅÙÌò¤ËΩ¤Ä¤«¤Ï¤Ã¤­¤ê¤·¤Ê¤¤¤¿¤á¡£
+
+
+Êäµ­ 6. ÌäÂêÅÀ
+
+ + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£
+
+ * ÀèÆ¬¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹
+ /./u =~ "\xa3"
+
+ * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤
+ /\w+/ =~ "a\xf3\x8ec"
+
+ ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£
+
+½ª¤ê
diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c
new file mode 100644
index 0000000..64be21d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/ascii.c
@@ -0,0 +1,67 @@
+/**********************************************************************
+ ascii.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingASCII = {
+ onigenc_single_byte_mbc_enc_len,
+ "US-ASCII", /* name */
+ 1, /* max byte length */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ ascii_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
new file mode 100644
index 0000000..8679266
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -0,0 +1,168 @@
+/**********************************************************************
+ big5.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_BIG5[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+big5_mbc_enc_len(const UChar* p)
+{
+ return EncLen_BIG5[*p];
+}
+
+static OnigCodePoint
+big5_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
+}
+
+static int
+big5_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
+}
+
+static int
+big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
+ pp, end, lower);
+}
+
+static int
+big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
+}
+
+static int
+big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
+}
+
+static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
+
+#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
+#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
+
+static UChar*
+big5_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ if (BIG5_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! BIG5_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_BIG5, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+
+ return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingBIG5 = {
+ big5_mbc_enc_len,
+ "Big5", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ big5_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ big5_code_to_mbc,
+ big5_mbc_to_normalize,
+ big5_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ big5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ big5_left_adjust_char_head,
+ big5_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c
new file mode 100644
index 0000000..71c81ee
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_jp.c
@@ -0,0 +1,228 @@
+/**********************************************************************
+ euc_jp.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
+
+static const int EncLen_EUCJP[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+eucjp_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCJP[*p];
+}
+
+static OnigCodePoint
+eucjp_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ n = (OnigCodePoint )*p++;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+eucjp_code_to_mbclen(OnigCodePoint code)
+{
+ if (ONIGENC_IS_CODE_ASCII(code)) return 1;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 0;
+}
+
+#if 0
+static int
+eucjp_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+#endif
+
+static int
+eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+eucjp_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ int len;
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1;
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
+}
+
+static int
+eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* In this encoding
+ mb-trail bytes doesn't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!eucjp_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e || c == 0x8e || c == 0x8f)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_JP = {
+ eucjp_mbc_enc_len,
+ "EUC-JP", /* name */
+ 3, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ eucjp_mbc_to_code,
+ eucjp_code_to_mbclen,
+ eucjp_code_to_mbc,
+ eucjp_mbc_to_normalize,
+ eucjp_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ eucjp_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ eucjp_left_adjust_char_head,
+ eucjp_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c
new file mode 100644
index 0000000..57bf801
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_kr.c
@@ -0,0 +1,173 @@
+/**********************************************************************
+ euc_kr.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_EUCKR[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euckr_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCKR[*p];
+}
+
+static OnigCodePoint
+euckr_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
+}
+
+static int
+euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
+}
+
+static int
+euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
+ pp, end, lower);
+}
+
+static int
+euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
+}
+
+static int
+euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
+}
+
+#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
+
+static UChar*
+euckr_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* Assumed in this encoding,
+ mb-trail bytes don't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!euckr_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_KR, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e) return TRUE;
+ else return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_KR = {
+ euckr_mbc_enc_len,
+ "EUC-KR", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euckr_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbc,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euckr_left_adjust_char_head,
+ euckr_is_allowed_reverse_match
+};
+
+/* Same with OnigEncodingEUC_KR except the name */
+OnigEncodingType OnigEncodingEUC_CN = {
+ euckr_mbc_enc_len,
+ "EUC-CN", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euckr_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbc,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euckr_left_adjust_char_head,
+ euckr_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c
new file mode 100644
index 0000000..6f396e7
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_tw.c
@@ -0,0 +1,144 @@
+/**********************************************************************
+ euc_tw.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_EUCTW[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euctw_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCTW[*p];
+}
+
+static OnigCodePoint
+euctw_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
+}
+
+static int
+euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
+}
+
+static int
+euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
+ pp, end, lower);
+}
+
+static int
+euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
+}
+
+static int
+euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
+}
+
+#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
+
+static UChar*
+euctw_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* Assumed in this encoding,
+ mb-trail bytes don't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!euctw_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_TW, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e) return TRUE;
+ else return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_TW = {
+ euctw_mbc_enc_len,
+ "EUC-TW", /* name */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euctw_mbc_to_code,
+ onigenc_mb4_code_to_mbclen,
+ euctw_code_to_mbc,
+ euctw_mbc_to_normalize,
+ euctw_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euctw_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euctw_left_adjust_char_head,
+ euctw_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c
new file mode 100644
index 0000000..01995ea
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/gb18030.c
@@ -0,0 +1,501 @@
+/**********************************************************************
+ gb18030.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2005 KUBO Takehiro <kubo AT jiubao DOT org>
+ * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#if 1
+#define DEBUG_GB18030(arg)
+#else
+#define DEBUG_GB18030(arg) printf arg
+#endif
+
+enum {
+ C1, /* one-byte char */
+ C2, /* one-byte or second of two-byte char */
+ C4, /* one-byte or second or fourth of four-byte char */
+ CM /* first of two- or four-byte char or second of two-byte char */
+};
+
+static const char GB18030_MAP[] = {
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
+ C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
+};
+
+static int
+gb18030_mbc_enc_len(const UChar* p)
+{
+ if (GB18030_MAP[*p] != CM)
+ return 1;
+ p++;
+ if (GB18030_MAP[*p] == C4)
+ return 4;
+ if (GB18030_MAP[*p] == C1)
+ return 1; /* illegal sequence */
+ return 2;
+}
+
+static OnigCodePoint
+gb18030_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
+}
+
+static int
+gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
+}
+
+static int
+gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag,
+ pp, end, lower);
+}
+
+static int
+gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
+}
+
+static int
+gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
+}
+
+enum state {
+ S_START,
+ S_one_C2,
+ S_one_C4,
+ S_one_CM,
+
+ S_odd_CM_one_CX,
+ S_even_CM_one_CX,
+
+ /* CMC4 : pair of "CM C4" */
+ S_one_CMC4,
+ S_odd_CMC4,
+ S_one_C4_odd_CMC4,
+ S_even_CMC4,
+ S_one_C4_even_CMC4,
+
+ S_odd_CM_odd_CMC4,
+ S_even_CM_odd_CMC4,
+
+ S_odd_CM_even_CMC4,
+ S_even_CM_even_CMC4,
+
+ /* C4CM : pair of "C4 CM" */
+ S_odd_C4CM,
+ S_one_CM_odd_C4CM,
+ S_even_C4CM,
+ S_one_CM_even_C4CM,
+
+ S_even_CM_odd_C4CM,
+ S_odd_CM_odd_C4CM,
+ S_even_CM_even_C4CM,
+ S_odd_CM_even_C4CM,
+};
+
+static UChar*
+gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ enum state state = S_START;
+
+ DEBUG_GB18030(("----------------\n"));
+ for (p = s; p >= start; p--) {
+ DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
+ switch (state) {
+ case S_START:
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ return (UChar *)s;
+ case C2:
+ state = S_one_C2; /* C2 */
+ break;
+ case C4:
+ state = S_one_C4; /* C4 */
+ break;
+ case CM:
+ state = S_one_CM; /* CM */
+ break;
+ }
+ break;
+ case S_one_C2: /* C2 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM C2 */
+ break;
+ }
+ break;
+ case S_one_C4: /* C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CMC4;
+ break;
+ }
+ break;
+ case S_one_CM: /* CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)s;
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM CM */
+ break;
+ }
+ break;
+
+ case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_one_CX;
+ break;
+ }
+ break;
+ case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX;
+ break;
+ }
+ break;
+
+ case S_one_CMC4: /* CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4; /* C4 CM C4 */
+ break;
+ case CM:
+ state = S_even_CM_one_CX; /* CM CM C4 */
+ break;
+ }
+ break;
+ case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_odd_CMC4: /* C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CMC4; /* CM C4 CM C4 */
+ break;
+ }
+ break;
+ case S_even_CMC4: /* CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 3);
+ case C4:
+ state = S_one_C4_even_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_even_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CM_odd_C4CM; /* CM C4 CM */
+ break;
+ }
+ break;
+ case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 2); /* |CM C4 CM */
+ case C4:
+ state = S_even_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_even_C4CM: /* C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* C4|CM C4 CM */
+ case CM:
+ state = S_one_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_odd_C4CM: /* CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|C4|CM */
+ case CM:
+ state = S_odd_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
+ case CM:
+ state = S_odd_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+ }
+ }
+
+ DEBUG_GB18030(("state %d\n", state));
+ switch (state) {
+ case S_START: return (UChar *)(s - 0);
+ case S_one_C2: return (UChar *)(s - 0);
+ case S_one_C4: return (UChar *)(s - 0);
+ case S_one_CM: return (UChar *)(s - 0);
+
+ case S_odd_CM_one_CX: return (UChar *)(s - 1);
+ case S_even_CM_one_CX: return (UChar *)(s - 0);
+
+ case S_one_CMC4: return (UChar *)(s - 1);
+ case S_odd_CMC4: return (UChar *)(s - 1);
+ case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
+ case S_even_CMC4: return (UChar *)(s - 3);
+ case S_one_C4_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
+ case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
+
+ case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
+ case S_even_CM_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_C4CM: return (UChar *)(s - 0);
+ case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_C4CM: return (UChar *)(s - 2);
+ case S_one_CM_even_C4CM: return (UChar *)(s - 0);
+
+ case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
+ case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_CM_even_C4CM: return (UChar *)(s - 2);
+ case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
+ }
+
+ return (UChar* )s; /* never come here. (escape warning) */
+}
+
+static int
+gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
+}
+
+OnigEncodingType OnigEncodingGB18030 = {
+ gb18030_mbc_enc_len,
+ "GB18030", /* name */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ gb18030_mbc_to_code,
+ onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbc,
+ gb18030_mbc_to_normalize,
+ gb18030_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ gb18030_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ gb18030_left_adjust_char_head,
+ gb18030_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c
new file mode 100644
index 0000000..5646f26
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_1.c
@@ -0,0 +1,151 @@
+/**********************************************************************
+ iso8859_1.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
+ ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_1_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_1 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-1", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_1_mbc_to_normalize,
+ iso_8859_1_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_1_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c
new file mode 100644
index 0000000..8081ef8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_10.c
@@ -0,0 +1,300 @@
+/**********************************************************************
+ iso8859_10.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
+ ((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247',
+ '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_10_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_10_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb2, 0xa2 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xa4 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_10 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-10", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_10_mbc_to_normalize,
+ iso_8859_10_is_mbc_ambiguous,
+ iso_8859_10_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_10_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c
new file mode 100644
index 0000000..de9bb3b
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_11.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_11.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
+ ((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_11_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_11 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-11", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_11_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c
new file mode 100644
index 0000000..69316ed
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_13.c
@@ -0,0 +1,268 @@
+/**********************************************************************
+ iso8859_13.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
+ ((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_13_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_13_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xb5 are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_13 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-13", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c
new file mode 100644
index 0000000..44638cf
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_14.c
@@ -0,0 +1,298 @@
+/**********************************************************************
+ iso8859_14.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
+ ((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247',
+ '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377',
+ '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271',
+ '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_14_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2,
+ 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_14_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa4, 0xa5 },
+ { 0xa5, 0xa4 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xab, 0xa6 },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb1, 0xb0 },
+ { 0xb2, 0xb3 },
+ { 0xb3, 0xb2 },
+ { 0xb4, 0xb5 },
+ { 0xb5, 0xb4 },
+ { 0xb7, 0xb9 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xb7 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xbf },
+ { 0xbc, 0xac },
+ { 0xbd, 0xbe },
+ { 0xbe, 0xbd },
+ { 0xbf, 0xbb },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_14 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-14", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c
new file mode 100644
index 0000000..f643b89
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_15.c
@@ -0,0 +1,279 @@
+/**********************************************************************
+ iso8859_15.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
+ ((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_15_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_15_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_15 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-15", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c
new file mode 100644
index 0000000..921ae36
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_16.c
@@ -0,0 +1,292 @@
+/**********************************************************************
+ iso8859_16.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
+ ((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247',
+ '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277',
+ '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_16_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2,
+ 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_16_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xae, 0xac },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xb9, 0xb2 },
+ { 0xba, 0xaa },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_16 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-16", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c
new file mode 100644
index 0000000..f8cb375
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_2.c
@@ -0,0 +1,292 @@
+/**********************************************************************
+ iso8859_2.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
+ ((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_2_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_2_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+static int
+iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_2 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-2", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_2_mbc_to_normalize,
+ iso_8859_2_is_mbc_ambiguous,
+ iso_8859_2_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_2_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c
new file mode 100644
index 0000000..e62d20d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_3.c
@@ -0,0 +1,281 @@
+/**********************************************************************
+ iso8859_3.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
+ ((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_3_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2,
+ 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_3_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xaf, 0xbf },
+ { 0xb1, 0xa1 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_3 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-3", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_3_mbc_to_normalize,
+ iso_8859_3_is_mbc_ambiguous,
+ iso_8859_3_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_3_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c
new file mode 100644
index 0000000..dd6bd7d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_4.c
@@ -0,0 +1,290 @@
+/**********************************************************************
+ iso8859_4.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
+ ((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_4_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_4_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xa2)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_4 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-4", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_4_mbc_to_normalize,
+ iso_8859_4_is_mbc_ambiguous,
+ iso_8859_4_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_4_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c
new file mode 100644
index 0000000..87b7fb8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_5.c
@@ -0,0 +1,296 @@
+/**********************************************************************
+ iso8859_5.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
+ ((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_5_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_5_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_5_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xf1 },
+ { 0xa2, 0xf2 },
+ { 0xa3, 0xf3 },
+ { 0xa4, 0xf4 },
+ { 0xa5, 0xf5 },
+ { 0xa6, 0xf6 },
+ { 0xa7, 0xf7 },
+ { 0xa8, 0xf8 },
+ { 0xa9, 0xf9 },
+ { 0xaa, 0xfa },
+ { 0xab, 0xfb },
+ { 0xac, 0xfc },
+ { 0xae, 0xfe },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xd0 },
+ { 0xb1, 0xd1 },
+ { 0xb2, 0xd2 },
+ { 0xb3, 0xd3 },
+ { 0xb4, 0xd4 },
+ { 0xb5, 0xd5 },
+ { 0xb6, 0xd6 },
+ { 0xb7, 0xd7 },
+ { 0xb8, 0xd8 },
+ { 0xb9, 0xd9 },
+ { 0xba, 0xda },
+ { 0xbb, 0xdb },
+ { 0xbc, 0xdc },
+ { 0xbd, 0xdd },
+ { 0xbe, 0xdf },
+ { 0xbf, 0xdf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xb0 },
+ { 0xd1, 0xb1 },
+ { 0xd2, 0xb2 },
+ { 0xd3, 0xb3 },
+ { 0xd4, 0xb4 },
+ { 0xd5, 0xb5 },
+ { 0xd6, 0xb6 },
+ { 0xd7, 0xb7 },
+ { 0xd8, 0xb8 },
+ { 0xd9, 0xb9 },
+ { 0xda, 0xba },
+ { 0xdb, 0xbb },
+ { 0xdc, 0xbc },
+ { 0xdd, 0xbd },
+ { 0xde, 0xbe },
+ { 0xdf, 0xbf },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xa1 },
+ { 0xf2, 0xa2 },
+ { 0xf3, 0xa3 },
+ { 0xf4, 0xa4 },
+ { 0xf5, 0xa5 },
+ { 0xf6, 0xa6 },
+ { 0xf7, 0xa7 },
+ { 0xf8, 0xa8 },
+ { 0xf9, 0xa9 },
+ { 0xfa, 0xaa },
+ { 0xfb, 0xab },
+ { 0xfc, 0xac },
+ { 0xfe, 0xae },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_5 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-5", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_5_mbc_to_normalize,
+ iso_8859_5_is_mbc_ambiguous,
+ iso_8859_5_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c
new file mode 100644
index 0000000..fffcd0e
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_6.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_6.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
+ ((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_6_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_6 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-6", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_6_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c
new file mode 100644
index 0000000..e87661d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_7.c
@@ -0,0 +1,278 @@
+/**********************************************************************
+ iso8859_7.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
+ ((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267',
+ '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376',
+ '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_7_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2,
+ 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000
+};
+
+static int
+iso_8859_7_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_7_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ if (*p == 0xc0 || *p == 0xe0)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xb6, 0xdc },
+ { 0xb8, 0xdd },
+ { 0xb9, 0xde },
+ { 0xba, 0xdf },
+ { 0xbc, 0xfc },
+ { 0xbe, 0xfd },
+ { 0xbf, 0xfe },
+
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xb6 },
+ { 0xdd, 0xb8 },
+ { 0xde, 0xb9 },
+ { 0xdf, 0xba },
+
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xbc },
+ { 0xfd, 0xbe },
+ { 0xfe, 0xbf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_7 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-7", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_7_mbc_to_normalize,
+ iso_8859_7_is_mbc_ambiguous,
+ iso_8859_7_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_7_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c
new file mode 100644
index 0000000..e76966c
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_8.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
+ ((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_8_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_8 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-8", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c
new file mode 100644
index 0000000..16a30c5
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_9.c
@@ -0,0 +1,270 @@
+/**********************************************************************
+ iso8859_9.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
+ ((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_9_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_9_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_9 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-9", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_9_mbc_to_normalize,
+ iso_8859_9_is_mbc_ambiguous,
+ iso_8859_9_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_9_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c
new file mode 100644
index 0000000..d7277e8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/koi8.c
@@ -0,0 +1,264 @@
+/**********************************************************************
+ koi8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
+#define ENC_IS_KOI8_CTYPE(code,ctype) \
+ ((EncKOI8_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncKOI8_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
+};
+
+static const unsigned short EncKOI8_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+};
+
+static int
+koi8_mbc_to_normalize(OnigAmbigType flag,
+ const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
+{
+ const OnigUChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
+{
+ const OnigUChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+
+static int
+koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_KOI8_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingKOI8 = {
+ onigenc_single_byte_mbc_enc_len,
+ "KOI8", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ koi8_mbc_to_normalize,
+ koi8_is_mbc_ambiguous,
+ koi8_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c
new file mode 100644
index 0000000..1010f5f
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/koi8_r.c
@@ -0,0 +1,266 @@
+/**********************************************************************
+ koi8_r.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
+#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
+ ((EncKOI8_R_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
+};
+
+static const unsigned short EncKOI8_R_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+};
+
+static int
+koi8_r_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_R_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_KOI8_R_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa3, 0xb3 },
+ { 0xb3, 0xa3 },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingKOI8_R = {
+ onigenc_single_byte_mbc_enc_len,
+ "KOI8-R", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ koi8_r_mbc_to_normalize,
+ koi8_r_is_mbc_ambiguous,
+ koi8_r_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_r_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c
new file mode 100644
index 0000000..fcf0574
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/mktable.c
@@ -0,0 +1,1115 @@
+/**********************************************************************
+ mktable.c
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NOT_RUBY
+#include "regenc.h"
+
+#define UNICODE_ISO_8859_1 0
+#define ISO_8859_1 1
+#define ISO_8859_2 2
+#define ISO_8859_3 3
+#define ISO_8859_4 4
+#define ISO_8859_5 5
+#define ISO_8859_6 6
+#define ISO_8859_7 7
+#define ISO_8859_8 8
+#define ISO_8859_9 9
+#define ISO_8859_10 10
+#define ISO_8859_11 11
+#define ISO_8859_13 12
+#define ISO_8859_14 13
+#define ISO_8859_15 14
+#define ISO_8859_16 15
+#define KOI8 16
+#define KOI8_R 17
+
+typedef struct {
+ int num;
+ char* name;
+} ENC_INFO;
+
+static ENC_INFO Info[] = {
+ { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
+ { ISO_8859_1, "ISO_8859_1" },
+ { ISO_8859_2, "ISO_8859_2" },
+ { ISO_8859_3, "ISO_8859_3" },
+ { ISO_8859_4, "ISO_8859_4" },
+ { ISO_8859_5, "ISO_8859_5" },
+ { ISO_8859_6, "ISO_8859_6" },
+ { ISO_8859_7, "ISO_8859_7" },
+ { ISO_8859_8, "ISO_8859_8" },
+ { ISO_8859_9, "ISO_8859_9" },
+ { ISO_8859_10, "ISO_8859_10" },
+ { ISO_8859_11, "ISO_8859_11" },
+ { ISO_8859_13, "ISO_8859_13" },
+ { ISO_8859_14, "ISO_8859_14" },
+ { ISO_8859_15, "ISO_8859_15" },
+ { ISO_8859_16, "ISO_8859_16" },
+ { KOI8, "KOI8" },
+ { KOI8_R, "KOI8_R" }
+};
+
+
+static int IsAlpha(int enc, int c)
+{
+ if (c >= 0x41 && c <= 0x5a) return 1;
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb1) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c >= 0xa1 && c <= 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c == 0xc0) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb5) return 1;
+ if (c >= 0xe0 && c <= 0xfa) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xa6) return 1;
+ if (c >= 0xa8 && c <= 0xac) return 1;
+ if (c == 0xae || c == 0xaf) return 1;
+ if (c >= 0xb1 && c <= 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xbf && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa1 || c == 0xa2) return 1;
+ if (c == 0xa4 || c == 0xa5) return 1;
+ if (c == 0xa6 || c == 0xa8) return 1;
+ if (c >= 0xaa && c <= 0xac) return 1;
+ if (c >= 0xaf && c <= 0xb5) return 1;
+ if (c >= 0xb7 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa2) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb3) return 1;
+ if (c == 0xb4) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xa3 || c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsBlank(int enc, int c)
+{
+ if (c == 0x09 || c == 0x20) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c == 0xa0) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsCntrl(int enc, int c)
+{
+ if (c >= 0x00 && c <= 0x1F) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0xad) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c >= 0x7f && c <= 0x9F) return 1;
+ break;
+
+
+ case KOI8_R:
+ if (c == 0x7f) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsDigit(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ return 0;
+}
+
+static int IsGraph(int enc, int c)
+{
+ if (c >= 0x21 && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ if (c >= 0xa1 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c >= 0xa1) {
+ if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
+ c == 0xe3 || c == 0xf0)
+ return 0;
+ else
+ return 1;
+ }
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
+ return 1;
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c >= 0xa1 && c <= 0xfe &&
+ c != 0xa4 && c != 0xa5 && c != 0xaa &&
+ c != 0xae && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c >= 0xa2 && c <= 0xfa) {
+ if (c >= 0xbf && c <= 0xde) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsLower(int enc, int c)
+{
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xb1) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c == 0xbf) return 1;
+ if (c == 0xdf) return 1;
+ if (c >= 0xe0 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c == 0xa2) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c == 0xdf) return 1;
+ if (c >= 0xe0 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xc0) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb5) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xb1 && c <= 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xbc) return 1;
+ if (c == 0xbe || c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_11:
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xb5) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa2) return 1;
+ if (c == 0xa5) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe || c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xbd) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa2) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb3) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xa3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xdf) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsPrint(int enc, int c)
+{
+ if (c >= 0x20 && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c >= 0x09 && c <= 0x0d) return 1;
+ if (c == 0x85) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ if (c >= 0xa0 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c >= 0xa0) {
+ if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
+ c == 0xe3 || c == 0xf0)
+ return 0;
+ else
+ return 1;
+ }
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xa0) return 1;
+ if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
+ return 1;
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c >= 0xa0 && c <= 0xfe &&
+ c != 0xa4 && c != 0xa5 && c != 0xaa &&
+ c != 0xae && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c >= 0xa0 && c <= 0xfa) {
+ if (c >= 0xbf && c <= 0xde) return 0;
+ if (c == 0xa1) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa0 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case KOI8:
+ if (c == 0xa0) return 1;
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c >= 0x80 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsPunct(int enc, int c)
+{
+ if (enc == UNICODE_ISO_8859_1) {
+ if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
+ c == 0x7c || c == 0x7e) return 1;
+ if (c >= 0x3c && c <= 0x3e) return 1;
+ }
+
+ if (c >= 0x21 && c <= 0x2f) return 1;
+ if (c >= 0x3a && c <= 0x40) return 1;
+ if (c >= 0x5b && c <= 0x60) return 1;
+ if (c >= 0x7b && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case ISO_8859_1:
+ case ISO_8859_9:
+ case ISO_8859_15:
+ if (c == 0xad) return 1;
+ /* fall */
+ case UNICODE_ISO_8859_1:
+ if (c == 0xa1) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xbf) return 1;
+ break;
+
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_14:
+ if (c == 0xad) return 1;
+ break;
+
+ case ISO_8859_3:
+ case ISO_8859_10:
+ if (c == 0xad) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xac) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xbf) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xa1 || c == 0xa2) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb7 || c == 0xbb) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xab) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xdf) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa1 || c == 0xa5) return 1;
+ if (c == 0xab || c == 0xad) return 1;
+ if (c == 0xb4 || c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xff) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa5) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9e) return 1;
+ break;
+
+ case ISO_8859_11:
+ case KOI8:
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsSpace(int enc, int c)
+{
+ if (c >= 0x09 && c <= 0x0d) return 1;
+ if (c == 0x20) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0x85) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c == 0xa0) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsUpper(int enc, int c)
+{
+ if (c >= 0x41 && c <= 0x5a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xbd) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ break;
+
+ case ISO_8859_6:
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ case ISO_8859_11:
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xa6) return 1;
+ if (c >= 0xa8 && c <= 0xac) return 1;
+ if (c == 0xae || c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa1) return 1;
+ if (c == 0xa4 || c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa || c == 0xac) return 1;
+ if (c == 0xaf || c == 0xb0) return 1;
+ if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
+ if (c == 0xbb || c == 0xbd) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xe0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsXDigit(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ if (c >= 0x41 && c <= 0x46) return 1;
+ if (c >= 0x61 && c <= 0x66) return 1;
+ return 0;
+}
+
+static int IsWord(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ if (c >= 0x41 && c <= 0x5a) return 1;
+ if (c == 0x5f) return 1;
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c >= 0xb2 && c <= 0xb3) return 1;
+ if (c == 0xb5) return 1;
+ if (c >= 0xb9 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xb1 && c <= 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbd) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c >= 0xa1 && c <= 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xea) return 1;
+ if (c >= 0xeb && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb2 || c == 0xb3) return 1;
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbf) return 1;
+ if (c == 0xc0) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xe0 && c <= 0xfa) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xff) {
+ if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xbf && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c >= 0xa1 && c <= 0xff) {
+ if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
+ c == 0xb6) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c >= 0xb2 && c <= 0xb3) return 1;
+ if (c == 0xb5) return 1;
+ if (c >= 0xb9 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xb8) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa2) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb3) return 1;
+ if (c == 0xb4) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9d) return 1;
+ if (c == 0xa3 || c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsAscii(int enc, int c)
+{
+ if (c >= 0x00 && c <= 0x7f) return 1;
+ return 0;
+}
+
+static int IsNewline(int enc, int c)
+{
+ if (c == 0x0a) return 1;
+ return 0;
+}
+
+static int exec(FILE* fp, ENC_INFO* einfo)
+{
+#define NCOL 8
+
+ int c, val, enc;
+
+ enc = einfo->num;
+
+ fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
+ einfo->name);
+
+ for (c = 0; c < 256; c++) {
+ val = 0;
+ if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
+ if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
+ if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
+ if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
+ if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
+ if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
+ if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
+ if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
+ if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
+ if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
+ if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
+ if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
+ if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
+ if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
+
+ if (c % NCOL == 0) fputs(" ", fp);
+ fprintf(fp, "0x%04x", val);
+ if (c != 255) fputs(",", fp);
+ if (c != 0 && c % NCOL == (NCOL-1))
+ fputs("\n", fp);
+ else
+ fputs(" ", fp);
+ }
+ fprintf(fp, "};\n");
+ return 0;
+}
+
+extern int main(int argc, char* argv[])
+{
+ int i;
+ FILE* fp = stdout;
+
+ for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
+ exec(fp, &Info[i]);
+ }
+}
diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c
new file mode 100644
index 0000000..f7d7d52
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/sjis.c
@@ -0,0 +1,238 @@
+/**********************************************************************
+ sjis.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_SJIS[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+};
+
+static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
+#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+
+static int
+sjis_mbc_enc_len(const UChar* p)
+{
+ return EncLen_SJIS[*p];
+}
+
+static int
+sjis_code_to_mbclen(OnigCodePoint code)
+{
+ if (code < 256) {
+ if (EncLen_SJIS[(int )code] == 1)
+ return 1;
+ else
+ return 0;
+ }
+ else if (code <= 0xffff) {
+ return 2;
+ }
+ else
+ return 0;
+}
+
+static OnigCodePoint
+sjis_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_SJIS, p);
+ c = *p++;
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 0
+ if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
+ return REGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+sjis_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1;
+ }
+ else {
+ int len = enc_len(ONIG_ENCODING_SJIS, p);
+
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
+
+}
+
+static int
+sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+sjis_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ if (SJIS_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! SJIS_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_SJIS, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingSJIS = {
+ sjis_mbc_enc_len,
+ "Shift_JIS", /* name */
+ 2, /* max byte length */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ sjis_mbc_to_code,
+ sjis_code_to_mbclen,
+ sjis_code_to_mbc,
+ sjis_mbc_to_normalize,
+ sjis_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ sjis_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ sjis_left_adjust_char_head,
+ sjis_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c
new file mode 100644
index 0000000..a8cf539
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/unicode.c
@@ -0,0 +1,3403 @@
+/**********************************************************************
+ unicode.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static const OnigCodePoint CRAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 414,
+#else
+ 9,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlnum */
+
+static const OnigCodePoint CRAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 396,
+#else
+ 8,
+#endif
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlpha */
+
+static const OnigCodePoint CRBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 3,
+#endif
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRBlank */
+
+static const OnigCodePoint CRCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 19,
+#else
+ 3,
+#endif
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRCntrl */
+
+static const OnigCodePoint CRDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 23,
+#else
+ 1,
+#endif
+ 0x0030, 0x0039
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRDigit */
+
+static const OnigCodePoint CRGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 2,
+#endif
+ 0x0021, 0x007e,
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRGraph */
+
+static const OnigCodePoint CRLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 424,
+#else
+ 6,
+#endif
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRLower */
+
+static const OnigCodePoint CRPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x007e,
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPrint */
+
+static const OnigCodePoint CRPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 86,
+#else
+ 14,
+#endif
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPunct */
+
+static const OnigCodePoint CRSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 11,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRSpace */
+
+static const OnigCodePoint CRUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 421,
+#else
+ 3,
+#endif
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRUpper */
+
+static const OnigCodePoint CRXDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 3,
+#else
+ 3,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static const OnigCodePoint CRASCII[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 1,
+#else
+ 1,
+#endif
+ 0x0000, 0x007f
+};
+
+static const OnigCodePoint CRWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 436,
+#else
+ 12,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRWord */
+
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ return onig_is_in_code_range((UChar* )CRAlpha, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return onig_is_in_code_range((UChar* )CRBlank, code);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return onig_is_in_code_range((UChar* )CRCntrl, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return onig_is_in_code_range((UChar* )CRDigit, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return onig_is_in_code_range((UChar* )CRGraph, code);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return onig_is_in_code_range((UChar* )CRLower, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return onig_is_in_code_range((UChar* )CRPrint, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return onig_is_in_code_range((UChar* )CRPunct, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return onig_is_in_code_range((UChar* )CRSpace, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return onig_is_in_code_range((UChar* )CRUpper, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return onig_is_in_code_range((UChar* )CRWord, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return onig_is_in_code_range((UChar* )CRAlnum, code);
+ break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ return TRUE;
+ }
+ return FALSE;
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}
+
+extern int
+onigenc_unicode_get_ctype_code_range(int ctype,
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+{
+ static const OnigCodePoint EmptyRange[] = { 0 };
+
+#define CR_SET(list) do { \
+ *mbr = list; \
+} while (0)
+
+ *sbr = EmptyRange;
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(CRAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(CRBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(CRCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SET(CRDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(CRGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(CRLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(CRPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(CRPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(CRSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(CRUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SET(CRXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(CRWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SET(CRASCII);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(CRAlnum);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c
new file mode 100644
index 0000000..6ab80a6
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_be.c
@@ -0,0 +1,232 @@
+/**********************************************************************
+ utf16_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static const int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16be_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*p];
+}
+
+static int
+utf16be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*(p+1) == 0x0a && *p == 0x00)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+ return 1;
+ if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+
+ if (UTF16_IS_SURROGATE_FIRST(*p)) {
+ code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ + p[3];
+ }
+ else {
+ code = p[0] * 256 + p[1];
+ }
+ return code;
+}
+
+static int
+utf16be_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ *p++ = (plane >> 2) + 0xd8;
+ high = (code & 0xff00) >> 8;
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (high & 0x02) + 0xdc;
+ *p = (UChar )(code & 0xff);
+ return 4;
+ }
+ else {
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )(code & 0xff);
+ return 2;
+ }
+}
+
+static int
+utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*p == 0) {
+ p++;
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len;
+ len = EncLen_UTF16[*p];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*p];
+
+ if (*p == 0) {
+ int c, v;
+
+ p++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_BE = {
+ utf16be_mbc_enc_len,
+ "UTF-16BE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16be_is_mbc_newline,
+ utf16be_mbc_to_code,
+ utf16be_code_to_mbclen,
+ utf16be_code_to_mbc,
+ utf16be_mbc_to_normalize,
+ utf16be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c
new file mode 100644
index 0000000..2248e49
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_le.c
@@ -0,0 +1,230 @@
+/**********************************************************************
+ utf16_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static const int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16le_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16le_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*(p+1)];
+}
+
+static int
+utf16le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*p == 0x0a && *(p+1) == 0x00)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+ UChar c0 = *p;
+ UChar c1 = *(p+1);
+
+ if (UTF16_IS_SURROGATE_FIRST(c1)) {
+ code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ + p[2];
+ }
+ else {
+ code = c1 * 256 + p[0];
+ }
+ return code;
+}
+
+static int
+utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ high = (code & 0xff00) >> 8;
+
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (plane >> 2) + 0xd8;
+ *p++ = (UChar )(code & 0xff);
+ *p = (high & 0x02) + 0xdc;
+ return 4;
+ }
+ else {
+ *p++ = (UChar )(code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ return 2;
+ }
+}
+
+static int
+utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0) {
+ *(lower+1) = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = EncLen_UTF16[*(p+1)];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*(p+1)];
+
+ if (*(p+1) == 0) {
+ int c, v;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_LE = {
+ utf16le_mbc_enc_len,
+ "UTF-16LE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16le_is_mbc_newline,
+ utf16le_mbc_to_code,
+ utf16le_code_to_mbclen,
+ utf16le_code_to_mbc,
+ utf16le_mbc_to_normalize,
+ utf16le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c
new file mode 100644
index 0000000..75133ca
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_be.c
@@ -0,0 +1,187 @@
+/**********************************************************************
+ utf32_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32be_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+ && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
+ return 1;
+ if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
+ && *(p+1) == 0 && *p == 0)
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
+}
+
+static int
+utf32be_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar ) (code & 0xff);
+ return 4;
+}
+
+static int
+utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ p += 3;
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ int c, v;
+
+ p += 3;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_BE = {
+ utf32be_mbc_enc_len,
+ "UTF-32BE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32be_is_mbc_newline,
+ utf32be_mbc_to_code,
+ utf32be_code_to_mbclen,
+ utf32be_code_to_mbc,
+ utf32be_mbc_to_normalize,
+ utf32be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c
new file mode 100644
index 0000000..21dca10
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_le.c
@@ -0,0 +1,185 @@
+/**********************************************************************
+ utf32_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32le_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
+ && (p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
+ && *(p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
+}
+
+static int
+utf32le_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar ) (code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ return 4;
+}
+
+static int
+utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower++ = *p;
+ }
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower = '\0';
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ int c, v;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_LE = {
+ utf32le_mbc_enc_len,
+ "UTF-32LE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32le_is_mbc_newline,
+ utf32le_mbc_to_code,
+ utf32le_code_to_mbclen,
+ utf32le_code_to_mbc,
+ utf32le_mbc_to_normalize,
+ utf32le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c
new file mode 100644
index 0000000..c7481d7
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf8.c
@@ -0,0 +1,3730 @@
+/**********************************************************************
+ utf8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define USE_INVALID_CODE_SCHEME
+
+#ifdef USE_INVALID_CODE_SCHEME
+/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
+#define INVALID_CODE_FE 0xfffffffe
+#define INVALID_CODE_FF 0xffffffff
+#define VALID_CODE_LIMIT 0x7fffffff
+#endif
+
+#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
+
+static const int EncLen_UTF8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+static int
+utf8_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF8[*p];
+}
+
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if (*p == 0x0d) return 1;
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
+static OnigCodePoint
+utf8_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_UTF8, p);
+ c = *p++;
+ if (len > 1) {
+ len--;
+ n = c & ((1 << (6 - len)) - 1);
+ while (len--) {
+ c = *p++;
+ n = (n << 6) | (c & ((1 << 6) - 1));
+ }
+ return n;
+ }
+ else {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (c > 0xfd) {
+ return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
+ }
+#endif
+ return (OnigCodePoint )c;
+ }
+}
+
+static int
+utf8_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0) return 1;
+ else if ((code & 0xfffff800) == 0) {
+ if (code <= 0xff && code >= 0xfe)
+ return 1;
+ return 2;
+ }
+ else if ((code & 0xffff0000) == 0) return 3;
+ else if ((code & 0xffe00000) == 0) return 4;
+ else if ((code & 0xfc000000) == 0) return 5;
+ else if ((code & 0x80000000) == 0) return 6;
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) return 1;
+ else if (code == INVALID_CODE_FF) return 1;
+#endif
+ else
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+}
+
+#if 0
+static int
+utf8_code_to_mbc_first(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0)
+ return code;
+ else {
+ if ((code & 0xfffff800) == 0)
+ return ((code>>6)& 0x1f) | 0xc0;
+ else if ((code & 0xffff0000) == 0)
+ return ((code>>12) & 0x0f) | 0xe0;
+ else if ((code & 0xffe00000) == 0)
+ return ((code>>18) & 0x07) | 0xf0;
+ else if ((code & 0xfc000000) == 0)
+ return ((code>>24) & 0x03) | 0xf8;
+ else if ((code & 0x80000000) == 0)
+ return ((code>>30) & 0x01) | 0xfc;
+ else {
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+ }
+}
+#endif
+
+static int
+utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
+#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
+
+ if ((code & 0xffffff80) == 0) {
+ *buf = (UChar )code;
+ return 1;
+ }
+ else {
+ UChar *p = buf;
+
+ if ((code & 0xfffff800) == 0) {
+ *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
+ }
+ else if ((code & 0xffff0000) == 0) {
+ *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xffe00000) == 0) {
+ *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xfc000000) == 0) {
+ *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0x80000000) == 0) {
+ *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
+ *p++ = UTF8_TRAILS(code, 24);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) {
+ *p = 0xfe;
+ return 1;
+ }
+ else if (code == INVALID_CODE_FF) {
+ *p = 0xff;
+ return 1;
+ }
+#endif
+ else {
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+
+ *p++ = UTF8_TRAIL0(code);
+ return p - buf;
+ }
+}
+
+static int
+utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+ }
+ else {
+ int len;
+
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if (c <= (UChar )'\236' && /* upper */
+ (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c != (UChar )'\227') {
+ *lower++ = *p;
+ *lower = (UChar )(c + 32);
+ (*pp) += 2;
+ return 2;
+ }
+ }
+ }
+ }
+
+ len = enc_len(ONIG_ENCODING_UTF8, p);
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ }
+ else {
+ (*pp) += enc_len(ONIG_ENCODING_UTF8, p);
+
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c <= (UChar )'\236') { /* upper */
+ if (c == (UChar )'\227') return FALSE;
+ return TRUE;
+ }
+ else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
+ if (c == (UChar )'\267') return FALSE;
+ return TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+
+static const OnigCodePoint EmptyRange[] = { 0 };
+
+static const OnigCodePoint SBAlnum[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 411,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlnum */
+
+static const OnigCodePoint SBAlpha[] = {
+ 2,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 394,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlpha */
+
+static const OnigCodePoint SBBlank[] = {
+ 2,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020
+};
+
+static const OnigCodePoint MBBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 7,
+#else
+ 1,
+#endif
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBBlank */
+
+static const OnigCodePoint SBCntrl[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x007f
+};
+
+static const OnigCodePoint MBCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 18,
+#else
+ 2,
+#endif
+ 0x0080, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBCntrl */
+
+static const OnigCodePoint SBDigit[] = {
+ 1,
+ 0x0030, 0x0039
+};
+
+static const OnigCodePoint MBDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 22,
+#else
+ 0
+#endif
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBDigit */
+
+static const OnigCodePoint SBGraph[] = {
+ 1,
+ 0x0021, 0x007e
+};
+
+static const OnigCodePoint MBGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 404,
+#else
+ 1,
+#endif
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBGraph */
+
+static const OnigCodePoint SBLower[] = {
+ 1,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 423,
+#else
+ 5,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBLower */
+
+static const OnigCodePoint SBPrint[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e
+};
+
+static const OnigCodePoint MBPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 403,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPrint */
+
+static const OnigCodePoint SBPunct[] = {
+ 9,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d
+}; /* end of SBPunct */
+
+static const OnigCodePoint MBPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 77,
+#else
+ 5,
+#endif
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPunct */
+
+static const OnigCodePoint SBSpace[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020
+};
+
+static const OnigCodePoint MBSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBSpace */
+
+static const OnigCodePoint SBUpper[] = {
+ 1,
+ 0x0041, 0x005a
+};
+
+static const OnigCodePoint MBUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 420,
+#else
+ 2,
+#endif
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBUpper */
+
+static const OnigCodePoint SBXDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static const OnigCodePoint SBASCII[] = {
+ 1,
+ 0x0000, 0x007f
+};
+
+static const OnigCodePoint SBWord[] = {
+ 4,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 432,
+#else
+ 8,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBWord */
+
+
+static int
+utf8_get_ctype_code_range(int ctype,
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+{
+#define CR_SET(sbl,mbl) do { \
+ *sbr = sbl; \
+ *mbr = mbl; \
+} while (0)
+
+#define CR_SB_SET(sbl) do { \
+ *sbr = sbl; \
+ *mbr = EmptyRange; \
+} while (0)
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(SBAlpha, MBAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(SBBlank, MBBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(SBCntrl, MBCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SET(SBDigit, MBDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(SBGraph, MBGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(SBLower, MBLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(SBPrint, MBPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(SBPunct, MBPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(SBSpace, MBSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(SBUpper, MBUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SB_SET(SBXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(SBWord, MBWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SB_SET(SBASCII);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(SBAlnum, MBAlnum);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
+
+static int
+utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ const OnigCodePoint *range;
+#endif
+
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ range = MBAlpha;
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ range = MBBlank;
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ range = MBCntrl;
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ range = MBDigit;
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ range = MBGraph;
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ range = MBLower;
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ range = MBPrint;
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ range = MBPunct;
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ range = MBSpace;
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ range = MBUpper;
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ range = MBWord;
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ range = MBAlnum;
+ break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return onig_is_in_code_range((UChar* )range, code);
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (code <= VALID_CODE_LIMIT)
+#endif
+ return TRUE;
+ }
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+
+ return FALSE;
+}
+
+static UChar*
+utf8_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!utf8_islead(*p) && p > start) p--;
+ return (UChar* )p;
+}
+
+OnigEncodingType OnigEncodingUTF8 = {
+ utf8_mbc_enc_len,
+ "UTF-8", /* name */
+ 6, /* max byte length */
+ 1, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf8_is_mbc_newline,
+ utf8_mbc_to_code,
+ utf8_code_to_mbclen,
+ utf8_code_to_mbc,
+ utf8_mbc_to_normalize,
+ utf8_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ utf8_is_code_ctype,
+ utf8_get_ctype_code_range,
+ utf8_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html
new file mode 100755
index 0000000..d55f1cc
--- /dev/null
+++ b/ext/mbstring/oniguruma/index.html
@@ -0,0 +1,187 @@
+<html>
+<head>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
+ <title>Oniguruma</title>
+</head>
+<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
+
+<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
+
+<p>
+(c) K.Kosako, updated at: 2007/08/16
+</p>
+
+<dl>
+<font color="orange">
+<dt><b>What's new</b>
+</font>
+<ul>
+<li>2007/08/16: Version 4.7.1 released.</li>
+<li>2007/07/14: Version 5.9.0 released.</li>
+<li>2007/06/20: Version 2.5.9 released.</li>
+<li>2007/06/20: Maintainer of 2.x was changed.</li>
+</ul>
+</dl>
+<hr>
+
+<p>
+Oniguruma is a regular expressions library.<br>
+The characteristics of this library is that different character encoding
+<br>for every regular expression object can be specified.
+<br>(supported APIs: GNU regex, POSIX and Oniguruma native)
+</p>
+
+<dl>
+<dt><b>Supported character encodings:</b><br>
+ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
+EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
+ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
+ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
+<font color="orange">
+(GB18030 encoding was contributed by KUBO Takehiro)<br>
+(CP1251 encoding was contributed by Byte)
+</font>
+</p>
+</dl>
+
+<hr>
+
+<dt><b>License:</b> BSD license.
+
+<dl>
+<dt><b>Platform:</b>
+<ul>
+<li> Unix (include Mac OS X)
+<li> Cygwin
+<li> Win32
+</ul>
+
+<br>
+
+<dt><b>Download:</b>
+<ul>
+<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14) <a href="HISTORY_5X.txt">Change Log</a>
+<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04)
+<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27)
+<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a>
+<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
+<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a>
+</ul>
+
+<br>
+<font color="red">
+Maintainer of 2.x was changed to Hannes Wyss &lt;hwyss AT ywesee.com&gt;.<br>
+About 2.x, please contact him.<br>
+</font>
+* 5.x supports Unicode Property/Script.<br>
+* 2.x supports Ruby1.6/1.8.<br>
+
+<br>
+<dt><b>Documents:</b> (version 5.9.0)
+<ul>
+ <li> <a href="doc/RE.txt">Regular Expressions</a>
+ <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
+ <li> <a href="doc/API.txt">Oniguruma API</a>
+ <a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
+</ul>
+
+<br>
+<dt><b>Sample Programs:</b>
+<ul>
+ <li><a href="sample/simple.c">example of the minimum</a>
+ <li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
+</ul>
+
+<br>
+<dt><b>Site Links:</b>
+<ul>
+<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
+<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt; Oniguruma</a> (Japanese page)
+</ul>
+
+<br>
+<dt><b>Links:</b>
+<ul>
+<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page)
+<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
+<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page)
+<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
+<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
+<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
+<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page)
+<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
+<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
+<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page)
+<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
+<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page)
+<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
+<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page)
+<li> <a href="http://monaos.org/">Mona OS</a>
+<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page)
+<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
+<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
+<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
+<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
+<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
+<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
+<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
+<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
+<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
+<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
+<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
+<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
+<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
+<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
+<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
+<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
+<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
+<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
+<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
+<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
+<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
+<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
+</ul>
+
+<br>
+<dt><b>References:</b>
+<ul>
+<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
+<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
+<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
+<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
+<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
+<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
+<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
+<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
+</ul>
+
+<br>
+</dl>
+<p>
+and I'm thankful to Akinori MUSHA.
+</p>
+
+<hr>
+<dl>
+<dt><b>Other Libraries:</b>
+<ul>
+<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
+<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
+<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
+<li> <a href="http://www.pcre.org/">PCRE</a>
+<li> <a href="http://re2c.org/">re2c</a>
+<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
+<li> <a href="http://laurikari.net/tre/">TRE</a>
+<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
+<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
+<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
+<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
+</ul>
+</dl>
+
+<hr>
+<a href="../">Back to Home</a>
+</body>
+</html>
diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h
new file mode 100644
index 0000000..d9b1419
--- /dev/null
+++ b/ext/mbstring/oniguruma/onigcmpt200.h
@@ -0,0 +1,310 @@
+/**********************************************************************
+
+ onigcmpt200.h - Oniguruma (regular expression library)
+
+ Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
+
+**********************************************************************/
+#ifndef ONIGCMPT200_H
+#define ONIGCMPT200_H
+
+/* constants */
+#define REG_MAX_ERROR_MESSAGE_LEN ONIG_MAX_ERROR_MESSAGE_LEN
+
+#define RegCharEncoding OnigEncoding
+
+#define REG_ENCODING_ASCII ONIG_ENCODING_ASCII
+#define REG_ENCODING_ISO_8859_1 ONIG_ENCODING_ISO_8859_1
+#define REG_ENCODING_ISO_8859_15 ONIG_ENCODING_ISO_8859_15
+#define REG_ENCODING_UTF8 ONIG_ENCODING_UTF8
+#define REG_ENCODING_EUC_JP ONIG_ENCODING_EUC_JP
+#define REG_ENCODING_SJIS ONIG_ENCODING_SJIS
+#define REG_ENCODING_BIG5 ONIG_ENCODING_BIG5
+#define REG_ENCODING_UNDEF ONIG_ENCODING_UNDEF
+
+/* Don't use REGCODE_XXXX. (obsoleted) */
+#define REGCODE_UNDEF REG_ENCODING_UNDEF
+#define REGCODE_ASCII REG_ENCODING_ASCII
+#define REGCODE_UTF8 REG_ENCODING_UTF8
+#define REGCODE_EUCJP REG_ENCODING_EUC_JP
+#define REGCODE_SJIS REG_ENCODING_SJIS
+
+/* Don't use REGCODE_XXXX. (obsoleted) */
+#define MBCTYPE_ASCII RE_MBCTYPE_ASCII
+#define MBCTYPE_EUC RE_MBCTYPE_EUC
+#define MBCTYPE_SJIS RE_MBCTYPE_SJIS
+#define MBCTYPE_UTF8 RE_MBCTYPE_UTF8
+
+typedef unsigned char* RegTransTableType;
+#define RegOptionType OnigOptionType
+#define RegDistance OnigDistance
+
+#define REG_OPTION_DEFAULT ONIG_OPTION_DEFAULT
+
+/* options */
+#define REG_OPTION_NONE ONIG_OPTION_NONE
+#define REG_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define REG_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define REG_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define REG_OPTION_EXTEND ONIG_OPTION_EXTEND
+#define REG_OPTION_FIND_LONGEST ONIG_OPTION_FIND_LONGEST
+#define REG_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
+#define REG_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
+#define REG_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
+#define REG_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
+#define REG_OPTION_NOTBOL ONIG_OPTION_NOTBOL
+#define REG_OPTION_NOTEOL ONIG_OPTION_NOTEOL
+#define REG_OPTION_POSIX_REGION ONIG_OPTION_POSIX_REGION
+
+#define REG_OPTION_ON ONIG_OPTION_ON
+#define REG_OPTION_OFF ONIG_OPTION_OFF
+#define IS_REG_OPTION_ON ONIG_IS_OPTION_ON
+
+/* syntax */
+#define RegSyntaxType OnigSyntaxType
+
+#define RegSyntaxPosixBasic OnigSyntaxPosixBasic
+#define RegSyntaxPosixExtended OnigSyntaxPosixExtended
+#define RegSyntaxEmacs OnigSyntaxEmacs
+#define RegSyntaxGrep OnigSyntaxGrep
+#define RegSyntaxGnuRegex OnigSyntaxGnuRegex
+#define RegSyntaxJava OnigSyntaxJava
+#define RegSyntaxPerl OnigSyntaxPerl
+#define RegSyntaxRuby OnigSyntaxRuby
+
+#define REG_SYNTAX_POSIX_BASIC ONIG_SYNTAX_POSIX_BASIC
+#define REG_SYNTAX_POSIX_EXTENDED ONIG_SYNTAX_POSIX_EXTENDED
+#define REG_SYNTAX_EMACS ONIG_SYNTAX_EMACS
+#define REG_SYNTAX_GREP ONIG_SYNTAX_GREP
+#define REG_SYNTAX_GNU_REGEX ONIG_SYNTAX_GNU_REGEX
+#define REG_SYNTAX_JAVA ONIG_SYNTAX_JAVA
+#define REG_SYNTAX_PERL ONIG_SYNTAX_PERL
+#define REG_SYNTAX_RUBY ONIG_SYNTAX_RUBY
+
+#define REG_SYNTAX_DEFAULT ONIG_SYNTAX_DEFAULT
+#define RegDefaultSyntax OnigDefaultSyntax
+
+/* syntax (operators) */
+#define REG_SYN_OP_VARIABLE_META_CHARACTERS \
+ ONIG_SYN_OP_VARIABLE_META_CHARACTERS
+#define REG_SYN_OP_DOT_ANYCHAR \
+ ONIG_SYN_OP_DOT_ANYCHAR
+#define REG_SYN_OP_ASTERISK_ZERO_INF \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF
+#define REG_SYN_OP_ESC_ASTERISK_ZERO_INF \
+ ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
+#define REG_SYN_OP_PLUS_ONE_INF \
+ ONIG_SYN_OP_PLUS_ONE_INF
+#define REG_SYN_OP_ESC_PLUS_ONE_INF \
+ ONIG_SYN_OP_ESC_PLUS_ONE_INF
+#define REG_SYN_OP_QMARK_ZERO_ONE \
+ ONIG_SYN_OP_QMARK_ZERO_ONE
+#define REG_SYN_OP_ESC_QMARK_ZERO_ONE \
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
+#define REG_SYN_OP_BRACE_INTERVAL \
+ ONIG_SYN_OP_BRACE_INTERVAL
+#define REG_SYN_OP_ESC_BRACE_INTERVAL \
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL
+#define REG_SYN_OP_VBAR_ALT \
+ ONIG_SYN_OP_VBAR_ALT
+#define REG_SYN_OP_ESC_VBAR_ALT \
+ ONIG_SYN_OP_ESC_VBAR_ALT
+#define REG_SYN_OP_LPAREN_SUBEXP \
+ ONIG_SYN_OP_LPAREN_SUBEXP
+#define REG_SYN_OP_ESC_LPAREN_SUBEXP \
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP
+#define REG_SYN_OP_ESC_AZ_BUF_ANCHOR \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
+#define REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR \
+ ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
+#define REG_SYN_OP_DECIMAL_BACKREF \
+ ONIG_SYN_OP_DECIMAL_BACKREF
+#define REG_SYN_OP_BRACKET_CC \
+ ONIG_SYN_OP_BRACKET_CC
+#define REG_SYN_OP_ESC_W_WORD \
+ ONIG_SYN_OP_ESC_W_WORD
+#define REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END \
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
+#define REG_SYN_OP_ESC_B_WORD_BOUND \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND
+#define REG_SYN_OP_ESC_S_WHITE_SPACE \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE
+#define REG_SYN_OP_ESC_D_DIGIT \
+ ONIG_SYN_OP_ESC_D_DIGIT
+#define REG_SYN_OP_LINE_ANCHOR \
+ ONIG_SYN_OP_LINE_ANCHOR
+#define REG_SYN_OP_POSIX_BRACKET \
+ ONIG_SYN_OP_POSIX_BRACKET
+#define REG_SYN_OP_QMARK_NON_GREEDY \
+ ONIG_SYN_OP_QMARK_NON_GREEDY
+#define REG_SYN_OP_ESC_CONTROL_CHARS \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS
+#define REG_SYN_OP_ESC_C_CONTROL \
+ ONIG_SYN_OP_ESC_C_CONTROL
+#define REG_SYN_OP_ESC_OCTAL3 \
+ ONIG_SYN_OP_ESC_OCTAL3
+#define REG_SYN_OP_ESC_X_HEX2 \
+ ONIG_SYN_OP_ESC_X_HEX2
+#define REG_SYN_OP_ESC_X_BRACE_HEX8 \
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8
+
+#define REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE \
+ ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
+#define REG_SYN_OP2_QMARK_GROUP_EFFECT \
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT
+#define REG_SYN_OP2_OPTION_PERL \
+ ONIG_SYN_OP2_OPTION_PERL
+#define REG_SYN_OP2_OPTION_RUBY \
+ ONIG_SYN_OP2_OPTION_RUBY
+#define REG_SYN_OP2_PLUS_POSSESSIVE_REPEAT \
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
+#define REG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL \
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
+#define REG_SYN_OP2_CCLASS_SET_OP \
+ ONIG_SYN_OP2_CCLASS_SET_OP
+#define REG_SYN_OP2_QMARK_LT_NAMED_GROUP \
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
+#define REG_SYN_OP2_ESC_K_NAMED_BACKREF \
+ ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
+#define REG_SYN_OP2_ESC_G_SUBEXP_CALL \
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
+#define REG_SYN_OP2_ATMARK_CAPTURE_HISTORY \
+ ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
+#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL \
+ ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
+#define REG_SYN_OP2_ESC_CAPITAL_M_BAR_META \
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
+#define REG_SYN_OP2_ESC_V_VTAB \
+ ONIG_SYN_OP2_ESC_V_VTAB
+#define REG_SYN_OP2_ESC_U_HEX4 \
+ ONIG_SYN_OP2_ESC_U_HEX4
+#define REG_SYN_OP2_ESC_GNU_BUF_ANCHOR \
+ ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+
+#define REG_SYN_CONTEXT_INDEP_ANCHORS \
+ ONIG_SYN_CONTEXT_INDEP_ANCHORS
+#define REG_SYN_CONTEXT_INDEP_REPEAT_OPS \
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
+#define REG_SYN_CONTEXT_INVALID_REPEAT_OPS \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
+#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP \
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
+#define REG_SYN_ALLOW_INVALID_INTERVAL \
+ ONIG_SYN_ALLOW_INVALID_INTERVAL
+#define REG_SYN_STRICT_CHECK_BACKREF \
+ ONIG_SYN_STRICT_CHECK_BACKREF
+#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND \
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
+#define REG_SYN_CAPTURE_ONLY_NAMED_GROUP \
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
+#define REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME \
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
+
+#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC \
+ ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
+#define REG_SYN_BACKSLASH_ESCAPE_IN_CC \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
+#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC \
+ ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+#define REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC \
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
+#define REG_SYN_WARN_CC_OP_NOT_ESCAPED \
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
+#define REG_SYN_WARN_REDUNDANT_NESTED_REPEAT \
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
+
+/* meta character specifiers (regex_set_meta_char()) */
+#define REG_META_CHAR_ESCAPE ONIG_META_CHAR_ESCAPE
+#define REG_META_CHAR_ANYCHAR ONIG_META_CHAR_ANYCHAR
+#define REG_META_CHAR_ANYTIME ONIG_META_CHAR_ANYTIME
+#define REG_META_CHAR_ZERO_OR_ONE_TIME ONIG_META_CHAR_ZERO_OR_ONE_TIME
+#define REG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ONE_OR_MORE_TIME
+#define REG_META_CHAR_ANYCHAR_ANYTIME ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+#define REG_INEFFECTIVE_META_CHAR ONIG_INEFFECTIVE_META_CHAR
+
+/* error codes */
+#define REG_IS_PATTERN_ERROR ONIG_IS_PATTERN_ERROR
+/* normal return */
+#define REG_NORMAL ONIG_NORMAL
+#define REG_MISMATCH ONIG_MISMATCH
+#define REG_NO_SUPPORT_CONFIG ONIG_NO_SUPPORT_CONFIG
+/* internal error */
+#define REGERR_MEMORY ONIGERR_MEMORY
+#define REGERR_MATCH_STACK_LIMIT_OVER ONIGERR_MATCH_STACK_LIMIT_OVER
+#define REGERR_TYPE_BUG ONIGERR_TYPE_BUG
+#define REGERR_PARSER_BUG ONIGERR_PARSER_BUG
+#define REGERR_STACK_BUG ONIGERR_STACK_BUG
+#define REGERR_UNDEFINED_BYTECODE ONIGERR_UNDEFINED_BYTECODE
+#define REGERR_UNEXPECTED_BYTECODE ONIGERR_UNEXPECTED_BYTECODE
+#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED \
+ ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
+#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR \
+ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR
+/* general error */
+#define REGERR_INVALID_ARGUMENT ONIGERR_INVALID_ARGUMENT
+/* errors related to thread */
+#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT \
+ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define REG_MAX_CAPTURE_HISTORY_GROUP ONIG_MAX_CAPTURE_HISTORY_GROUP
+#define REG_IS_CAPTURE_HISTORY_GROUP ONIG_IS_CAPTURE_HISTORY_GROUP
+
+#define REG_REGION_NOTPOS ONIG_REGION_NOTPOS
+
+#define RegRegion OnigRegion
+#define RegErrorInfo OnigErrorInfo
+#define RegRepeatRange OnigRepeatRange
+
+#define RegWarnFunc OnigWarnFunc
+#define regex_null_warn onig_null_warn
+#define REG_NULL_WARN ONIG_NULL_WARN
+
+/* regex_t state */
+#define REG_STATE_NORMAL ONIG_STATE_NORMAL
+#define REG_STATE_SEARCHING ONIG_STATE_SEARCHING
+#define REG_STATE_COMPILING ONIG_STATE_COMPILING
+#define REG_STATE_MODIFY ONIG_STATE_MODIFY
+
+#define REG_STATE ONIG_STATE
+
+/* Oniguruma Native API */
+#define regex_init onig_init
+#define regex_error_code_to_str onig_error_code_to_str
+#define regex_set_warn_func onig_set_warn_func
+#define regex_set_verb_warn_func onig_set_verb_warn_func
+#define regex_new onig_new
+#define regex_free onig_free
+#define regex_recompile onig_recompile
+#define regex_search onig_search
+#define regex_match onig_match
+#define regex_region_new onig_region_new
+#define regex_region_free onig_region_free
+#define regex_region_copy onig_region_copy
+#define regex_region_clear onig_region_clear
+#define regex_region_resize onig_region_resize
+#define regex_name_to_group_numbers onig_name_to_group_numbers
+#define regex_name_to_backref_number onig_name_to_backref_number
+#define regex_foreach_name onig_foreach_name
+#define regex_number_of_names onig_number_of_names
+#define regex_get_encoding onig_get_encoding
+#define regex_get_options onig_get_options
+#define regex_get_syntax onig_get_syntax
+#define regex_set_default_syntax onig_set_default_syntax
+#define regex_copy_syntax onig_copy_syntax
+#define regex_set_meta_char onig_set_meta_char
+#define regex_end onig_end
+#define regex_version onig_version
+
+/* encoding API */
+#define enc_get_prev_char_head onigenc_get_prev_char_head
+#define enc_get_left_adjust_char_head onigenc_get_left_adjust_char_head
+#define enc_get_right_adjust_char_head onigenc_get_right_adjust_char_head
+/* obsoleted API */
+#define regex_get_prev_char_head onigenc_get_prev_char_head
+#define regex_get_left_adjust_char_head onigenc_get_left_adjust_char_head
+#define regex_get_right_adjust_char_head onigenc_get_right_adjust_char_head
+
+#endif /* ONIGCMPT200_H */
diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h
new file mode 100644
index 0000000..3da9f23
--- /dev/null
+++ b/ext/mbstring/oniguruma/oniggnu.h
@@ -0,0 +1,85 @@
+#ifndef ONIGGNU_H
+#define ONIGGNU_H
+/**********************************************************************
+ oniggnu.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "oniguruma.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RE_MBCTYPE_ASCII 0
+#define RE_MBCTYPE_EUC 1
+#define RE_MBCTYPE_SJIS 2
+#define RE_MBCTYPE_UTF8 3
+
+/* GNU regex options */
+#ifndef RE_NREGS
+#define RE_NREGS ONIG_NREGION
+#endif
+
+#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
+#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
+#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
+#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
+#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
+#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
+
+
+ONIG_EXTERN
+void re_mbcinit P_((int));
+ONIG_EXTERN
+int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
+ONIG_EXTERN
+void re_free_pattern P_((struct re_pattern_buffer*));
+ONIG_EXTERN
+int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
+ONIG_EXTERN
+int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
+ONIG_EXTERN
+int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
+ONIG_EXTERN
+void re_set_casetable P_((const char*));
+ONIG_EXTERN
+void re_free_registers P_((struct re_registers*));
+ONIG_EXTERN
+int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ONIGGNU_H */
diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h
new file mode 100644
index 0000000..cfeb88a
--- /dev/null
+++ b/ext/mbstring/oniguruma/onigposix.h
@@ -0,0 +1,169 @@
+#ifndef ONIGPOSIX_H
+#define ONIGPOSIX_H
+/**********************************************************************
+ onigposix.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* options */
+#define REG_ICASE (1<<0)
+#define REG_NEWLINE (1<<1)
+#define REG_NOTBOL (1<<2)
+#define REG_NOTEOL (1<<3)
+#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
+#define REG_NOSUB (1<<5)
+
+/* POSIX error codes */
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+
+/* extended error codes */
+#define REG_EONIG_INTERNAL 14
+#define REG_EONIG_BADWC 15
+#define REG_EONIG_BADARG 16
+#define REG_EONIG_THREAD 17
+
+/* character encodings (for reg_set_encoding()) */
+#define REG_POSIX_ENCODING_ASCII 0
+#define REG_POSIX_ENCODING_EUC_JP 1
+#define REG_POSIX_ENCODING_SJIS 2
+#define REG_POSIX_ENCODING_UTF8 3
+#define REG_POSIX_ENCODING_UTF16_BE 4
+#define REG_POSIX_ENCODING_UTF16_LE 5
+
+
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+
+/* POSIX regex_t */
+typedef struct {
+ void* onig; /* Oniguruma regex_t* */
+ size_t re_nsub;
+ int comp_options;
+} regex_t;
+
+
+#ifndef P_
+#if defined(__STDC__) || defined(_WIN32)
+# define P_(args) args
+#else
+# define P_(args) ()
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#if defined(_WIN32) && !defined(__GNUC__)
+#if defined(EXPORT) || defined(RUBY_EXPORT)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
+#endif
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
+#endif
+
+#ifndef ONIGURUMA_H
+typedef unsigned int OnigOptionType;
+
+/* syntax */
+typedef struct {
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+} OnigSyntaxType;
+
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
+/* default syntax */
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
+
+ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
+
+ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
+ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
+ONIG_EXTERN const char* onig_version P_((void));
+ONIG_EXTERN const char* onig_copyright P_((void));
+
+#endif /* ONIGURUMA_H */
+
+
+ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
+ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
+ONIG_EXTERN void regfree P_((regex_t* reg));
+ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
+
+/* extended API */
+ONIG_EXTERN void reg_set_encoding P_((int enc));
+ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
+ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
+ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ONIGPOSIX_H */
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h
new file mode 100644
index 0000000..5196a3d
--- /dev/null
+++ b/ext/mbstring/oniguruma/oniguruma.h
@@ -0,0 +1,905 @@
+#ifndef ONIGURUMA_H
+#define ONIGURUMA_H
+/**********************************************************************
+ oniguruma.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "../php_onig_compat.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ONIGURUMA
+#define ONIGURUMA_VERSION_MAJOR 4
+#define ONIGURUMA_VERSION_MINOR 7
+#define ONIGURUMA_VERSION_TEENY 1
+
+#ifdef __cplusplus
+# ifndef HAVE_PROTOTYPES
+# define HAVE_PROTOTYPES 1
+# endif
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+#ifndef P_
+#if defined(__STDC__) || defined(_WIN32)
+# define P_(args) args
+#else
+# define P_(args) ()
+#endif
+#endif
+
+#ifndef PV_
+#ifdef HAVE_STDARG_PROTOTYPES
+# define PV_(args) args
+#else
+# define PV_(args) ()
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#if defined(_WIN32) && !defined(__GNUC__)
+#if defined(EXPORT) || defined(RUBY_EXPORT)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
+#endif
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
+#endif
+
+/* PART: character encoding */
+
+#ifndef ONIG_ESCAPE_UCHAR_COLLISION
+#define UChar OnigUChar
+#endif
+
+typedef unsigned char OnigUChar;
+typedef unsigned long OnigCodePoint;
+typedef unsigned int OnigDistance;
+
+#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+
+/* ambiguous match flag */
+typedef unsigned int OnigAmbigType;
+
+ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
+
+#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
+#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
+#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
+
+#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
+
+#define ONIGENC_AMBIGUOUS_MATCH_FULL \
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE )
+#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag
+
+
+#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
+#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
+
+/* code range */
+#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
+#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
+#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
+
+typedef struct {
+ int len;
+ OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
+} OnigCompAmbigCodeItem;
+
+typedef struct {
+ int n;
+ OnigCodePoint code;
+ OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
+} OnigCompAmbigCodes;
+
+typedef struct {
+ OnigCodePoint from;
+ OnigCodePoint to;
+} OnigPairAmbigCodes;
+
+typedef struct {
+ OnigCodePoint esc;
+ OnigCodePoint anychar;
+ OnigCodePoint anytime;
+ OnigCodePoint zero_or_one_time;
+ OnigCodePoint one_or_more_time;
+ OnigCodePoint anychar_anytime;
+} OnigMetaCharTableType;
+
+
+#if defined(RUBY_PLATFORM) && defined(M17N_H)
+
+#define ONIG_RUBY_M17N
+typedef m17n_encoding* OnigEncoding;
+
+#else
+
+typedef struct {
+ int (*mbc_enc_len)(const OnigUChar* p);
+ const char* name;
+ int max_enc_len;
+ int min_enc_len;
+ OnigAmbigType support_ambig_flag;
+ OnigMetaCharTableType meta_char_table;
+ int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
+ OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
+ int (*code_to_mbclen)(OnigCodePoint code);
+ int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
+ int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
+ int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
+ int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
+ int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
+ int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
+ int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
+ OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
+ int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
+} OnigEncodingType;
+
+typedef OnigEncodingType* OnigEncoding;
+
+ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
+ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
+ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
+ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
+ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
+
+#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
+#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
+#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
+#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
+#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
+#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
+#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
+#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
+#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
+#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
+#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
+#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
+#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
+#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
+#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
+#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
+#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
+#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
+#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
+#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
+#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
+#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
+#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
+#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
+#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
+#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
+#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
+#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
+#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
+
+#endif /* else RUBY && M17N */
+
+#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+
+
+/* work size */
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
+
+/* character types */
+#define ONIGENC_CTYPE_NEWLINE (1<< 0)
+#define ONIGENC_CTYPE_ALPHA (1<< 1)
+#define ONIGENC_CTYPE_BLANK (1<< 2)
+#define ONIGENC_CTYPE_CNTRL (1<< 3)
+#define ONIGENC_CTYPE_DIGIT (1<< 4)
+#define ONIGENC_CTYPE_GRAPH (1<< 5)
+#define ONIGENC_CTYPE_LOWER (1<< 6)
+#define ONIGENC_CTYPE_PRINT (1<< 7)
+#define ONIGENC_CTYPE_PUNCT (1<< 8)
+#define ONIGENC_CTYPE_SPACE (1<< 9)
+#define ONIGENC_CTYPE_UPPER (1<<10)
+#define ONIGENC_CTYPE_XDIGIT (1<<11)
+#define ONIGENC_CTYPE_WORD (1<<12)
+#define ONIGENC_CTYPE_ASCII (1<<13)
+#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
+
+#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p)
+
+#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
+#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
+#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
+#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
+#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
+#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
+ (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
+#define ONIGENC_IS_MBC_WORD(enc,s,end) \
+ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
+
+
+#ifdef ONIG_RUBY_M17N
+
+#include <ctype.h> /* for isblank(), isgraph() */
+
+#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
+ onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
+#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
+ onigenc_is_mbc_ambiguous(enc,flag,pp,end)
+
+#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ onigenc_is_allowed_reverse_match(enc, s, end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ onigenc_get_left_adjust_char_head(enc, start, s)
+#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
+#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
+ ONIG_NO_SUPPORT_CONFIG
+#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
+#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) \
+ (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
+ : ONIG_INFINITE_DISTANCE)
+#define ONIGENC_MBC_MINLEN(enc) 1
+#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
+
+#if 0 /* !! not supported !! */
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
+#define ONIGENC_STEP_BACK(enc,start,s,n)
+#endif
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
+ onigenc_is_code_ctype(enc,code,ctype)
+
+#ifdef isblank
+# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
+#else
+# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
+#endif
+#ifdef isgraph
+# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
+#else
+# define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ (isprint((int )code) && !isspace((int )code))
+#endif
+
+#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
+#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
+#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
+#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
+#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
+
+ONIG_EXTERN
+int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
+ONIG_EXTERN
+int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf));
+ONIG_EXTERN
+int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf));
+ONIG_EXTERN
+int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end));
+ONIG_EXTERN
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
+
+#else /* ONIG_RUBY_M17N */
+
+#define ONIGENC_NAME(enc) ((enc)->name)
+
+#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
+ (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf)
+#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
+ (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end)
+#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ (enc)->is_allowed_reverse_match(s,end)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
+ (enc)->left_adjust_char_head(start, s)
+#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
+ (enc)->get_all_pair_ambig_codes(ambig_flag,acs)
+#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
+ (enc)->get_all_comp_ambig_codes(ambig_flag,acs)
+#define ONIGENC_STEP_BACK(enc,start,s,n) \
+ onigenc_step_back((enc),(start),(s),(n))
+
+#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
+#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
+#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
+#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
+
+#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
+#define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
+#define ONIGENC_IS_CODE_PRINT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
+#define ONIGENC_IS_CODE_LOWER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
+#define ONIGENC_IS_CODE_UPPER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
+#define ONIGENC_IS_CODE_SPACE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
+#define ONIGENC_IS_CODE_BLANK(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
+#define ONIGENC_IS_CODE_WORD(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
+
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
+ (enc)->get_ctype_code_range(ctype,sbr,mbr)
+
+ONIG_EXTERN
+OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
+
+#endif /* is not ONIG_RUBY_M17N */
+
+
+/* encoding API */
+ONIG_EXTERN
+int onigenc_init P_((void));
+ONIG_EXTERN
+int onigenc_set_default_encoding P_((OnigEncoding enc));
+ONIG_EXTERN
+OnigEncoding onigenc_get_default_encoding P_((void));
+ONIG_EXTERN
+void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
+ONIG_EXTERN
+OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
+ONIG_EXTERN
+OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
+ONIG_EXTERN
+int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
+ONIG_EXTERN
+int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
+ONIG_EXTERN
+int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
+
+
+
+/* PART: regular expression */
+
+/* config parameters */
+#define ONIG_NREGION 10
+#define ONIG_MAX_BACKREF_NUM 1000
+#define ONIG_MAX_REPEAT_NUM 100000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
+/* constants */
+#define ONIG_MAX_ERROR_MESSAGE_LEN 90
+
+typedef unsigned int OnigOptionType;
+
+#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
+
+/* options */
+#define ONIG_OPTION_NONE 0U
+#define ONIG_OPTION_IGNORECASE 1U
+#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
+#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
+#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
+#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
+#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
+#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
+/* options (search time) */
+#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
+#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
+#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
+
+#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
+#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
+#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
+
+/* syntax */
+typedef struct {
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+} OnigSyntaxType;
+
+ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
+
+/* default syntax */
+ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
+
+/* syntax (operators) */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
+#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
+
+/* syntax (behavior) */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
+
+/* syntax (behavior) in char class [...] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+/* syntax (behavior) warning */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
+
+/* meta character specifiers (onig_set_meta_char()) */
+#define ONIG_META_CHAR_ESCAPE 0
+#define ONIG_META_CHAR_ANYCHAR 1
+#define ONIG_META_CHAR_ANYTIME 2
+#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
+#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
+#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
+
+#define ONIG_INEFFECTIVE_META_CHAR 0
+
+/* error codes */
+#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
+/* normal return */
+#define ONIG_NORMAL 0
+#define ONIG_MISMATCH -1
+#define ONIG_NO_SUPPORT_CONFIG -2
+
+/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
+#define ONIGERR_PARSER_BUG -11
+#define ONIGERR_STACK_BUG -12
+#define ONIGERR_UNDEFINED_BYTECODE -13
+#define ONIGERR_UNEXPECTED_BYTECODE -14
+#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
+#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+/* general error */
+#define ONIGERR_INVALID_ARGUMENT -30
+/* syntax error */
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
+#define ONIGERR_EMPTY_CHAR_CLASS -102
+#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
+#define ONIGERR_END_PATTERN_AT_ESCAPE -104
+#define ONIGERR_END_PATTERN_AT_META -105
+#define ONIGERR_END_PATTERN_AT_CONTROL -106
+#define ONIGERR_META_CODE_SYNTAX -108
+#define ONIGERR_CONTROL_CODE_SYNTAX -109
+#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
+#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
+#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
+#define ONIGERR_NESTED_REPEAT_OPERATOR -115
+#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
+#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
+#define ONIGERR_END_PATTERN_IN_GROUP -118
+#define ONIGERR_UNDEFINED_GROUP_OPTION -119
+#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
+#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
+#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
+/* values error (syntax error) */
+#define ONIGERR_TOO_BIG_NUMBER -200
+#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
+#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
+#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
+#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
+#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
+#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
+#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
+#define ONIGERR_INVALID_BACKREF -208
+#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
+#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
+#define ONIGERR_EMPTY_GROUP_NAME -214
+#define ONIGERR_INVALID_GROUP_NAME -215
+#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
+#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
+#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
+#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
+#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
+#define ONIGERR_NEVER_ENDING_RECURSION -221
+#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
+#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
+#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
+
+/* errors related to thread */
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
+#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
+ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
+
+typedef struct OnigCaptureTreeNodeStruct {
+ int group; /* group number */
+ int beg;
+ int end;
+ int allocated;
+ int num_childs;
+ struct OnigCaptureTreeNodeStruct** childs;
+} OnigCaptureTreeNode;
+
+/* match result region type */
+struct re_registers {
+ int allocated;
+ int num_regs;
+ int* beg;
+ int* end;
+ /* extended */
+ OnigCaptureTreeNode* history_root; /* capture history tree root */
+};
+
+/* capture tree traverse */
+#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
+#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
+#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
+ ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
+
+
+#define ONIG_REGION_NOTPOS -1
+
+typedef struct re_registers OnigRegion;
+
+typedef struct {
+ OnigEncoding enc;
+ OnigUChar* par;
+ OnigUChar* par_end;
+} OnigErrorInfo;
+
+typedef struct {
+ int lower;
+ int upper;
+} OnigRepeatRange;
+
+typedef void (*OnigWarnFunc) P_((const char* s));
+extern void onig_null_warn P_((const char* s));
+#define ONIG_NULL_WARN onig_null_warn
+
+#define ONIG_CHAR_TABLE_SIZE 256
+
+/* regex_t state */
+#define ONIG_STATE_NORMAL 0
+#define ONIG_STATE_SEARCHING 1
+#define ONIG_STATE_COMPILING -1
+#define ONIG_STATE_MODIFY -2
+
+#define ONIG_STATE(reg) \
+ ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
+
+typedef struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int state; /* normal, searching, compiling */
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_comb_exp_check; /* combination explosion check */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
+ OnigAmbigType ambig_flag;
+ void* name_table;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigDistance dmin; /* min-distance of exact or map */
+ OnigDistance dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+} OnigRegexType;
+
+typedef OnigRegexType* OnigRegex;
+
+#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
+ typedef OnigRegexType regex_t;
+#endif
+
+
+typedef struct {
+ int num_of_elements;
+ OnigEncoding pattern_enc;
+ OnigEncoding target_enc;
+ OnigSyntaxType* syntax;
+ OnigOptionType option;
+ OnigAmbigType ambig_flag;
+} OnigCompileInfo;
+
+/* Oniguruma Native API */
+ONIG_EXTERN
+int onig_init P_((void));
+ONIG_EXTERN
+int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
+ONIG_EXTERN
+void onig_set_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+void onig_set_verb_warn_func P_((OnigWarnFunc f));
+ONIG_EXTERN
+int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+ONIG_EXTERN
+void onig_free P_((OnigRegex));
+ONIG_EXTERN
+int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+OnigRegion* onig_region_new P_((void));
+ONIG_EXTERN
+void onig_region_init P_((OnigRegion* region));
+ONIG_EXTERN
+void onig_region_free P_((OnigRegion* region, int free_self));
+ONIG_EXTERN
+void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
+ONIG_EXTERN
+void onig_region_clear P_((OnigRegion* region));
+ONIG_EXTERN
+int onig_region_resize P_((OnigRegion* region, int n));
+ONIG_EXTERN
+int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
+ONIG_EXTERN
+int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
+ONIG_EXTERN
+int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
+ONIG_EXTERN
+int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
+ONIG_EXTERN
+int onig_number_of_names P_((OnigRegex reg));
+ONIG_EXTERN
+int onig_number_of_captures P_((OnigRegex reg));
+ONIG_EXTERN
+int onig_number_of_capture_histories P_((OnigRegex reg));
+ONIG_EXTERN
+OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
+ONIG_EXTERN
+int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
+ONIG_EXTERN
+int onig_noname_group_capture_is_active P_((OnigRegex reg));
+ONIG_EXTERN
+OnigEncoding onig_get_encoding P_((OnigRegex reg));
+ONIG_EXTERN
+OnigOptionType onig_get_options P_((OnigRegex reg));
+ONIG_EXTERN
+OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg));
+ONIG_EXTERN
+OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
+ONIG_EXTERN
+int onig_set_default_syntax P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
+ONIG_EXTERN
+unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
+ONIG_EXTERN
+void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
+ONIG_EXTERN
+void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
+ONIG_EXTERN
+void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
+ONIG_EXTERN
+int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
+ONIG_EXTERN
+void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
+ONIG_EXTERN
+OnigAmbigType onig_get_default_ambig_flag P_((void));
+ONIG_EXTERN
+int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
+ONIG_EXTERN
+unsigned int onig_get_match_stack_limit_size P_((void));
+ONIG_EXTERN
+int onig_set_match_stack_limit_size P_((unsigned int size));
+ONIG_EXTERN
+int onig_end P_((void));
+ONIG_EXTERN
+const char* onig_version P_((void));
+ONIG_EXTERN
+const char* onig_copyright P_((void));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ONIGURUMA_H */
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
new file mode 100644
index 0000000..6a0976d
--- /dev/null
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -0,0 +1,6044 @@
+/**********************************************************************
+ regcomp.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regparse.h"
+
+OnigAmbigType OnigDefaultAmbigFlag =
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
+
+extern OnigAmbigType
+onig_get_default_ambig_flag(void)
+{
+ return OnigDefaultAmbigFlag;
+}
+
+extern int
+onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+{
+ OnigDefaultAmbigFlag = ambig_flag;
+ return 0;
+}
+
+
+static UChar*
+k_strdup(UChar* s, UChar* end)
+{
+ int len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+
+/*
+ Caution: node should not be a string node.
+ (s and end member address break)
+*/
+static void
+swap_node(Node* a, Node* b)
+{
+ Node c;
+ c = *a; *a = *b; *b = c;
+}
+
+static OnigDistance
+distance_add(OnigDistance d1, OnigDistance d2)
+{
+ if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
+ return ONIG_INFINITE_DISTANCE;
+ else {
+ if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
+ else return ONIG_INFINITE_DISTANCE;
+ }
+}
+
+static OnigDistance
+distance_multiply(OnigDistance d, int m)
+{
+ if (m == 0) return 0;
+
+ if (d < ONIG_INFINITE_DISTANCE / m)
+ return d * m;
+ else
+ return ONIG_INFINITE_DISTANCE;
+}
+
+static int
+bitset_is_empty(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ if (bs[i] != 0) return 0;
+ }
+ return 1;
+}
+
+#ifdef ONIG_DEBUG
+static int
+bitset_on_num(BitSetRef bs)
+{
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) n++;
+ }
+ return n;
+}
+#endif
+
+extern int
+onig_bbuf_init(BBuf* buf, int size)
+{
+ buf->p = (UChar* )xmalloc(size);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+
+ buf->alloc = size;
+ buf->used = 0;
+ return 0;
+}
+
+
+#ifdef USE_SUBEXP_CALL
+
+static int
+unset_addr_list_init(UnsetAddrList* uslist, int size)
+{
+ UnsetAddr* p;
+
+ p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ uslist->num = 0;
+ uslist->alloc = size;
+ uslist->us = p;
+ return 0;
+}
+
+static void
+unset_addr_list_end(UnsetAddrList* uslist)
+{
+ if (IS_NOT_NULL(uslist->us))
+ xfree(uslist->us);
+}
+
+static int
+unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
+{
+ UnsetAddr* p;
+ int size;
+
+ if (uslist->num >= uslist->alloc) {
+ size = uslist->alloc * 2;
+ p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ uslist->alloc = size;
+ uslist->us = p;
+ }
+
+ uslist->us[uslist->num].offset = offset;
+ uslist->us[uslist->num].target = node;
+ uslist->num++;
+ return 0;
+}
+#endif /* USE_SUBEXP_CALL */
+
+
+static int
+add_opcode(regex_t* reg, int opcode)
+{
+ BBUF_ADD1(reg, opcode);
+ return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ return 0;
+}
+#endif
+
+static int
+add_rel_addr(regex_t* reg, int addr)
+{
+ RelAddrType ra = (RelAddrType )addr;
+
+ BBUF_ADD(reg, &ra, SIZE_RELADDR);
+ return 0;
+}
+
+static int
+add_abs_addr(regex_t* reg, int addr)
+{
+ AbsAddrType ra = (AbsAddrType )addr;
+
+ BBUF_ADD(reg, &ra, SIZE_ABSADDR);
+ return 0;
+}
+
+static int
+add_length(regex_t* reg, int len)
+{
+ LengthType l = (LengthType )len;
+
+ BBUF_ADD(reg, &l, SIZE_LENGTH);
+ return 0;
+}
+
+static int
+add_mem_num(regex_t* reg, int num)
+{
+ MemNumType n = (MemNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_MEMNUM);
+ return 0;
+}
+
+static int
+add_pointer(regex_t* reg, void* addr)
+{
+ PointerType ptr = (PointerType )addr;
+
+ BBUF_ADD(reg, &ptr, SIZE_POINTER);
+ return 0;
+}
+
+static int
+add_option(regex_t* reg, OnigOptionType option)
+{
+ BBUF_ADD(reg, &option, SIZE_OPTION);
+ return 0;
+}
+
+static int
+add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_rel_addr(reg, addr);
+ return r;
+}
+
+static int
+add_bytes(regex_t* reg, UChar* bytes, int len)
+{
+ BBUF_ADD(reg, bytes, len);
+ return 0;
+}
+
+static int
+add_bitset(regex_t* reg, BitSetRef bs)
+{
+ BBUF_ADD(reg, bs, SIZE_BITSET);
+ return 0;
+}
+
+static int
+add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_option(reg, option);
+ return r;
+}
+
+static int compile_length_tree(Node* node, regex_t* reg);
+static int compile_tree(Node* node, regex_t* reg);
+
+
+#define IS_NEED_STR_LEN_OP_EXACT(op) \
+ ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
+ (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
+
+static int
+select_str_opcode(int mb_len, int str_len, int ignore_case)
+{
+ int op;
+
+ if (ignore_case) {
+ switch (str_len) {
+ case 1: op = OP_EXACT1_IC; break;
+ default: op = OP_EXACTN_IC; break;
+ }
+ }
+ else {
+ switch (mb_len) {
+ case 1:
+ switch (str_len) {
+ case 1: op = OP_EXACT1; break;
+ case 2: op = OP_EXACT2; break;
+ case 3: op = OP_EXACT3; break;
+ case 4: op = OP_EXACT4; break;
+ case 5: op = OP_EXACT5; break;
+ default: op = OP_EXACTN; break;
+ }
+ break;
+
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_EXACTMB2N1; break;
+ case 2: op = OP_EXACTMB2N2; break;
+ case 3: op = OP_EXACTMB2N3; break;
+ default: op = OP_EXACTMB2N; break;
+ }
+ break;
+
+ case 3:
+ op = OP_EXACTMB3N;
+ break;
+
+ default:
+ op = OP_EXACTMBN;
+ break;
+ }
+ }
+ return op;
+}
+
+static int
+compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
+{
+ int r;
+ int saved_num_null_check = reg->num_null_check;
+
+ if (empty_info != 0) {
+ r = add_opcode(reg, OP_NULL_CHECK_START);
+ if (r) return r;
+ r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
+ if (r) return r;
+ reg->num_null_check++;
+ }
+
+ r = compile_tree(node, reg);
+ if (r) return r;
+
+ if (empty_info != 0) {
+ if (empty_info == NQ_TARGET_IS_EMPTY)
+ r = add_opcode(reg, OP_NULL_CHECK_END);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
+
+ if (r) return r;
+ r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+static int
+compile_call(CallNode* node, regex_t* reg)
+{
+ int r;
+
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
+ node->target);
+ if (r) return r;
+ r = add_abs_addr(reg, 0 /*dummy addr.*/);
+ return r;
+}
+#endif
+
+static int
+compile_tree_n_times(Node* node, int n, regex_t* reg)
+{
+ int i, r;
+
+ for (i = 0; i < n; i++) {
+ r = compile_tree(node, reg);
+ if (r) return r;
+ }
+ return 0;
+}
+
+static int
+add_compile_string_length(UChar* s, int mb_len, int str_len,
+ regex_t* reg, int ignore_case)
+{
+ int len;
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+
+ len = SIZE_OPCODE;
+
+ if (op == OP_EXACTMBN) len += SIZE_LENGTH;
+ if (IS_NEED_STR_LEN_OP_EXACT(op))
+ len += SIZE_LENGTH;
+
+ len += mb_len * str_len;
+ return len;
+}
+
+static int
+add_compile_string(UChar* s, int mb_len, int str_len,
+ regex_t* reg, int ignore_case)
+{
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+ add_opcode(reg, op);
+
+ if (op == OP_EXACTMBN)
+ add_length(reg, mb_len);
+
+ if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ if (op == OP_EXACTN_IC)
+ add_length(reg, mb_len * str_len);
+ else
+ add_length(reg, str_len);
+ }
+
+ add_bytes(reg, s, mb_len * str_len);
+ return 0;
+}
+
+
+static int
+compile_length_string_node(Node* node, regex_t* reg)
+{
+ int rlen, r, len, prev_len, slen, ambig;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev;
+ StrNode* sn;
+
+ sn = &(NSTRING(node));
+ if (sn->end <= sn->s)
+ return 0;
+
+ ambig = NSTRING_IS_AMBIG(node);
+
+ p = prev = sn->s;
+ prev_len = enc_len(enc, p);
+ p += prev_len;
+ slen = 1;
+ rlen = 0;
+
+ for (; p < sn->end; ) {
+ len = enc_len(enc, p);
+ if (len == prev_len) {
+ slen++;
+ }
+ else {
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+ p += len;
+ }
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+ return rlen;
+}
+
+static int
+compile_length_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+compile_string_node(Node* node, regex_t* reg)
+{
+ int r, len, prev_len, slen, ambig;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev, *end;
+ StrNode* sn;
+
+ sn = &(NSTRING(node));
+ if (sn->end <= sn->s)
+ return 0;
+
+ end = sn->end;
+ ambig = NSTRING_IS_AMBIG(node);
+
+ p = prev = sn->s;
+ prev_len = enc_len(enc, p);
+ p += prev_len;
+ slen = 1;
+
+ for (; p < end; ) {
+ len = enc_len(enc, p);
+ if (len == prev_len) {
+ slen++;
+ }
+ else {
+ r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ if (r) return r;
+
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+
+ p += len;
+ }
+ return add_compile_string(prev, prev_len, slen, reg, ambig);
+}
+
+static int
+compile_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
+{
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ add_length(reg, mbuf->used);
+ return add_bytes(reg, mbuf->p, mbuf->used);
+#else
+ static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+
+ int r, pad_size;
+ UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
+
+ GET_ALIGNMENT_PAD_SIZE(p, pad_size);
+ add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+
+ r = add_bytes(reg, mbuf->p, mbuf->used);
+
+ /* padding for return value from compile_length_cclass_node() to be fix. */
+ pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+ return r;
+#endif
+}
+
+static int
+compile_length_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int len;
+
+ if (IS_CCLASS_SHARE(cc)) {
+ len = SIZE_OPCODE + SIZE_POINTER;
+ return len;
+ }
+
+ if (IS_NULL(cc->mbuf)) {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+ else {
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ len = SIZE_OPCODE;
+ }
+ else {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ len += SIZE_LENGTH + cc->mbuf->used;
+#else
+ len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
+#endif
+ }
+
+ return len;
+}
+
+static int
+compile_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int r;
+
+ if (IS_CCLASS_SHARE(cc)) {
+ add_opcode(reg, OP_CCLASS_NODE);
+ r = add_pointer(reg, cc);
+ return r;
+ }
+
+ if (IS_NULL(cc->mbuf)) {
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_NOT);
+ else
+ add_opcode(reg, OP_CCLASS);
+
+ r = add_bitset(reg, cc->bs);
+ }
+ else {
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MB_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MB);
+
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ else {
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MIX);
+
+ r = add_bitset(reg, cc->bs);
+ if (r) return r;
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ }
+
+ return r;
+}
+
+static int
+entry_repeat_range(regex_t* reg, int id, int lower, int upper)
+{
+#define REPEAT_RANGE_ALLOC 4
+
+ OnigRepeatRange* p;
+
+ if (reg->repeat_range_alloc == 0) {
+ p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
+ }
+ else if (reg->repeat_range_alloc <= id) {
+ int n;
+ n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
+ p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
+ sizeof(OnigRepeatRange) * n);
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = n;
+ }
+ else {
+ p = reg->repeat_range;
+ }
+
+ p[id].lower = lower;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ return 0;
+}
+
+static int
+compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
+ regex_t* reg)
+{
+ int r;
+ int num_repeat = reg->num_repeat;
+
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ reg->num_repeat++;
+ if (r) return r;
+ r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
+ if (r) return r;
+
+ r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
+ if (r) return r;
+
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+
+ if (
+#ifdef USE_SUBEXP_CALL
+ reg->num_call > 0 ||
+#endif
+ IS_QUANTIFIER_IN_REPEAT(qn)) {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
+ }
+ else {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ }
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ return r;
+}
+
+static int
+is_anychar_star_quantifier(QuantifierNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == N_ANYCHAR)
+ return 1;
+ else
+ return 0;
+}
+
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_quantifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+static int
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact))
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ len = SIZE_OP_JUMP;
+ }
+ else {
+ len = tlen * qn->lower;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ else
+ len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ }
+ else
+ len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ }
+ else if (!infinite && qn->greedy &&
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ len = tlen * qn->lower;
+ len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ }
+
+ return len;
+}
+
+static int
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+ int i, r, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ if (is_anychar_star_quantifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options))
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ else
+ return add_opcode(reg, OP_ANYCHAR_STAR);
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ else
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ }
+ else {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTRING(qn->head_exact).s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+ }
+ else if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+ }
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!infinite && qn->greedy &&
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ int n = qn->upper - qn->lower;
+
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ for (i = 0; i < n; i++) {
+ r = add_opcode_rel_addr(reg, OP_PUSH,
+ (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ }
+ return r;
+}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+static int
+compile_length_option_node(EffectNode* node, regex_t* reg)
+{
+ int tlen;
+ OnigOptionType prev = reg->options;
+
+ reg->options = node->option;
+ tlen = compile_length_tree(node->target, reg);
+ reg->options = prev;
+
+ if (tlen < 0) return tlen;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
+ + tlen + SIZE_OP_SET_OPTION;
+ }
+ else
+ return tlen;
+}
+
+static int
+compile_option_node(EffectNode* node, regex_t* reg)
+{
+ int r;
+ OnigOptionType prev = reg->options;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL);
+ if (r) return r;
+ }
+
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ }
+ return r;
+}
+
+static int
+compile_length_effect_node(EffectNode* node, regex_t* reg)
+{
+ int len;
+ int tlen;
+
+ if (node->type == EFFECT_OPTION)
+ return compile_length_option_node(node, reg);
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+ else
+ tlen = 0;
+
+ switch (node->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH + tlen
+ + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ len = SIZE_OP_MEMORY_START_PUSH;
+ else
+ len = SIZE_OP_MEMORY_START;
+
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
+ QuantifierNode* qn = &NQUANTIFIER(node->target);
+ tlen = compile_length_tree(qn->target, reg);
+ if (tlen < 0) return tlen;
+
+ len = tlen * qn->lower
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ }
+ else {
+ len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return len;
+}
+
+static int get_char_length_tree(Node* node, regex_t* reg, int* len);
+
+static int
+compile_effect_node(EffectNode* node, regex_t* reg)
+{
+ int r, len;
+
+ if (node->type == EFFECT_OPTION)
+ return compile_option_node(node, reg);
+
+ switch (node->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ node->state |= NST_ADDR_FIXED;
+ r = add_abs_addr(reg, (int )node->call_addr);
+ if (r) return r;
+ len = compile_length_tree(node->target, reg);
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_EFFECT_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) return r;
+ }
+#endif
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_START_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_START);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CALLED(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ else
+ r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
+
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = add_opcode(reg, OP_RETURN);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_END);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
+ QuantifierNode* qn = &NQUANTIFIER(node->target);
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ len = compile_length_tree(qn->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ }
+ else {
+ r = add_opcode(reg, OP_PUSH_STOP_BT);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_STOP_BT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int len;
+ int tlen = 0;
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+
+ switch (node->type) {
+ case ANCHOR_PREC_READ:
+ len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
+ break;
+ case ANCHOR_LOOK_BEHIND:
+ len = SIZE_OP_LOOK_BEHIND + tlen;
+ break;
+ case ANCHOR_LOOK_BEHIND_NOT:
+ len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = SIZE_OPCODE;
+ break;
+ }
+
+ return len;
+}
+
+static int
+compile_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int r, len;
+
+ switch (node->type) {
+ case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
+ case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
+ case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
+ case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
+ case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
+ case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
+
+ case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
+ case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
+ case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
+#endif
+
+ case ANCHOR_PREC_READ:
+ r = add_opcode(reg, OP_PUSH_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_POS);
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+ r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_POS);
+ break;
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ int n;
+ r = add_opcode(reg, OP_LOOK_BEHIND);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ int n;
+ len = compile_length_tree(node->target, reg);
+ r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
+ len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_tree(Node* node, regex_t* reg)
+{
+ int len, type, r;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ len = 0;
+ do {
+ r = compile_length_tree(NCONS(node).left, reg);
+ if (r < 0) return r;
+ len += r;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ r = len;
+ break;
+
+ case N_ALT:
+ {
+ int n;
+
+ n = r = 0;
+ do {
+ r += compile_length_tree(NCONS(node).left, reg);
+ n++;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
+ }
+ break;
+
+ case N_STRING:
+ if (NSTRING_IS_RAW(node))
+ r = compile_length_string_raw_node(&(NSTRING(node)), reg);
+ else
+ r = compile_length_string_node(node, reg);
+ break;
+
+ case N_CCLASS:
+ r = compile_length_cclass_node(&(NCCLASS(node)), reg);
+ break;
+
+ case N_CTYPE:
+ case N_ANYCHAR:
+ r = SIZE_OPCODE;
+ break;
+
+ case N_BACKREF:
+ {
+ BackrefNode* br = &(NBACKREF(node));
+
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
+ if (br->back_num == 1) {
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ }
+ else {
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ r = SIZE_OP_CALL;
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
+ break;
+
+ case N_EFFECT:
+ r = compile_length_effect_node(&NEFFECT(node), reg);
+ break;
+
+ case N_ANCHOR:
+ r = compile_length_anchor_node(&(NANCHOR(node)), reg);
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_tree(Node* node, regex_t* reg)
+{
+ int n, type, len, pos, r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ do {
+ r = compile_tree(NCONS(node).left, reg);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ Node* x = node;
+ len = 0;
+ do {
+ len += compile_length_tree(NCONS(x).left, reg);
+ if (NCONS(x).right != NULL) {
+ len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ }
+ } while (IS_NOT_NULL(x = NCONS(x).right));
+ pos = reg->used + len; /* goal position */
+
+ do {
+ len = compile_length_tree(NCONS(node).left, reg);
+ if (IS_NOT_NULL(NCONS(node).right)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ if (r) break;
+ }
+ r = compile_tree(NCONS(node).left, reg);
+ if (r) break;
+ if (IS_NOT_NULL(NCONS(node).right)) {
+ len = pos - (reg->used + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) break;
+ }
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_STRING:
+ if (NSTRING_IS_RAW(node))
+ r = compile_string_raw_node(&(NSTRING(node)), reg);
+ else
+ r = compile_string_node(node, reg);
+ break;
+
+ case N_CCLASS:
+ r = compile_cclass_node(&(NCCLASS(node)), reg);
+ break;
+
+ case N_CTYPE:
+ {
+ int op;
+
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: op = OP_WORD; break;
+ case CTYPE_NOT_WORD: op = OP_NOT_WORD; break;
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+ r = add_opcode(reg, op);
+ }
+ break;
+
+ case N_ANYCHAR:
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML);
+ else
+ r = add_opcode(reg, OP_ANYCHAR);
+ break;
+
+ case N_BACKREF:
+ {
+ BackrefNode* br = &(NBACKREF(node));
+
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
+ }
+ else
+#endif
+ if (br->back_num == 1) {
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
+ }
+ else {
+ int i;
+ int* p;
+
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ r = add_opcode(reg, OP_BACKREF_MULTI);
+ }
+ if (r) return r;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ add_bacref_mems:
+#endif
+ r = add_length(reg, br->back_num);
+ if (r) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r) return r;
+ }
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ r = compile_call(&(NCALL(node)), reg);
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
+ break;
+
+ case N_EFFECT:
+ r = compile_effect_node(&NEFFECT(node), reg);
+ break;
+
+ case N_ANCHOR:
+ r = compile_anchor_node(&(NANCHOR(node)), reg);
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
+#endif
+ break;
+ }
+
+ return r;
+}
+
+#ifdef USE_NAMED_GROUP
+
+static int
+noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
+{
+ int r = 0;
+ Node* node = *plink;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = noname_disable_map(&(NCONS(node).left), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUANTIFIER:
+ {
+ Node** ptarget = &(NQUANTIFIER(node).target);
+ Node* old = *ptarget;
+ r = noname_disable_map(ptarget, map, counter);
+ if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
+ onig_reduce_nested_quantifier(node, *ptarget);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ if (en->type == EFFECT_MEMORY) {
+ if (IS_EFFECT_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ else {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ }
+ }
+ else
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+renumber_node_backref(Node* node, GroupNumRemap* map)
+{
+ int i, pos, n, old_num;
+ int *backs;
+ BackrefNode* bn = &(NBACKREF(node));
+
+ if (! IS_BACKREF_NAME_REF(bn))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+
+ old_num = bn->back_num;
+ if (IS_NULL(bn->back_dynamic))
+ backs = bn->back_static;
+ else
+ backs = bn->back_dynamic;
+
+ for (i = 0, pos = 0; i < old_num; i++) {
+ n = map[backs[i]].new_val;
+ if (n > 0) {
+ backs[pos] = n;
+ pos++;
+ }
+ }
+
+ bn->back_num = pos;
+ return 0;
+}
+
+static int
+renumber_by_map(Node* node, GroupNumRemap* map)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = renumber_by_map(NCONS(node).left, map);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUANTIFIER:
+ r = renumber_by_map(NQUANTIFIER(node).target, map);
+ break;
+ case N_EFFECT:
+ r = renumber_by_map(NEFFECT(node).target, map);
+ break;
+
+ case N_BACKREF:
+ r = renumber_node_backref(node, map);
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+numbered_ref_check(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = numbered_ref_check(NCONS(node).left);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+ case N_QUANTIFIER:
+ r = numbered_ref_check(NQUANTIFIER(node).target);
+ break;
+ case N_EFFECT:
+ r = numbered_ref_check(NEFFECT(node).target);
+ break;
+
+ case N_BACKREF:
+ if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
+{
+ int r, i, pos, counter;
+ BitStatusType loc;
+ GroupNumRemap* map;
+
+ map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
+ CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
+ for (i = 1; i <= env->num_mem; i++) {
+ map[i].new_val = 0;
+ }
+ counter = 0;
+ r = noname_disable_map(root, map, &counter);
+ if (r != 0) return r;
+
+ r = renumber_by_map(*root, map);
+ if (r != 0) return r;
+
+ for (i = 1, pos = 1; i <= env->num_mem; i++) {
+ if (map[i].new_val > 0) {
+ SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
+ pos++;
+ }
+ }
+
+ loc = env->capture_history;
+ BIT_STATUS_CLEAR(env->capture_history);
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(loc, i)) {
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
+ }
+ }
+
+ env->num_mem = env->num_named;
+ reg->num_mem = env->num_named;
+
+ return onig_renumber_name_table(reg, map);
+}
+#endif /* USE_NAMED_GROUP */
+
+#ifdef USE_SUBEXP_CALL
+static int
+unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
+{
+ int i, offset;
+ EffectNode* en;
+ AbsAddrType addr;
+
+ for (i = 0; i < uslist->num; i++) {
+ en = &(NEFFECT(uslist->us[i].target));
+ if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+ addr = en->call_addr;
+ offset = uslist->us[i].offset;
+
+ BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
+ }
+ return 0;
+}
+#endif
+
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+static int
+quantifiers_memory_node_info(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case N_LIST:
+ case N_ALT:
+ {
+ int v;
+ do {
+ v = quantifiers_memory_node_info(NCONS(node).left);
+ if (v > r) r = v;
+ } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&NCALL(node))) {
+ return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
+ }
+ else
+ r = quantifiers_memory_node_info(NCALL(node).target);
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ if (qn->upper != 0) {
+ r = quantifiers_memory_node_info(qn->target);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
+
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = quantifiers_memory_node_info(en->target);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_BACKREF:
+ case N_STRING:
+ case N_CTYPE:
+ case N_CCLASS:
+ case N_ANYCHAR:
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+
+static int
+get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
+{
+ OnigDistance tmin;
+ int r = 0;
+
+ *min = 0;
+ switch (NTYPE(node)) {
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ if (br->state & NST_RECURSION) break;
+
+ backs = BACKREFS_P(br);
+ if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[0]], min, env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env);
+ if (r != 0) break;
+ if (*min > tmin) *min = tmin;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&NCALL(node))) {
+ EffectNode* en = &(NEFFECT(NCALL(node).target));
+ if (IS_EFFECT_MIN_FIXED(en))
+ *min = en->min_len;
+ }
+ else
+ r = get_min_match_length(NCALL(node).target, min, env);
+ break;
+#endif
+
+ case N_LIST:
+ do {
+ r = get_min_match_length(NCONS(node).left, &tmin, env);
+ if (r == 0) *min += tmin;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ Node *x, *y;
+ y = node;
+ do {
+ x = NCONS(y).left;
+ r = get_min_match_length(x, &tmin, env);
+ if (r != 0) break;
+ if (y == node) *min = tmin;
+ else if (*min > tmin) *min = tmin;
+ } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ *min = sn->end - sn->s;
+ }
+ break;
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: *min = 1; break;
+ case CTYPE_NOT_WORD: *min = 1; break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *min = 1;
+ break;
+
+ case N_QUANTIFIER:
+ {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+
+ if (qn->lower > 0) {
+ r = get_min_match_length(qn->target, min, env);
+ if (r == 0)
+ *min = distance_multiply(*min, qn->lower);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ r = get_min_match_length(en->target, min, env);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_EFFECT_STATUS(node, NST_MIN_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_min_match_length(en->target, min, env);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
+{
+ OnigDistance tmax;
+ int r = 0;
+
+ *max = 0;
+ switch (NTYPE(node)) {
+ case N_LIST:
+ do {
+ r = get_max_match_length(NCONS(node).left, &tmax, env);
+ if (r == 0)
+ *max = distance_add(*max, tmax);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ do {
+ r = get_max_match_length(NCONS(node).left, &tmax, env);
+ if (r == 0 && *max < tmax) *max = tmax;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ *max = sn->end - sn->s;
+ }
+ break;
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ if (br->state & NST_RECURSION) {
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+ }
+ backs = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env);
+ if (r != 0) break;
+ if (*max < tmax) *max = tmax;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (! IS_CALL_RECURSION(&(NCALL(node))))
+ r = get_max_match_length(NCALL(node).target, max, env);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+
+ if (qn->upper != 0) {
+ r = get_max_match_length(qn->target, max, env);
+ if (r == 0 && *max != 0) {
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ *max = distance_multiply(*max, qn->upper);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ r = get_max_match_length(en->target, max, env);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_EFFECT_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_max_match_length(en->target, max, env);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+#define GET_CHAR_LEN_VARLEN -1
+#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
+
+/* fixed size pattern node only */
+static int
+get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
+{
+ int tlen;
+ int r = 0;
+
+ level++;
+ *len = 0;
+ switch (NTYPE(node)) {
+ case N_LIST:
+ do {
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+ if (r == 0)
+ *len = distance_add(*len, tlen);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ {
+ int tlen2;
+ int varlen = 0;
+
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+ while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
+ r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
+ if (r == 0) {
+ if (tlen != tlen2)
+ varlen = 1;
+ }
+ }
+ if (r == 0) {
+ if (varlen != 0) {
+ if (level == 1)
+ r = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else
+ *len = tlen;
+ }
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ UChar *s = sn->s;
+ while (s < sn->end) {
+ s += enc_len(reg->enc, s);
+ (*len)++;
+ }
+ }
+ break;
+
+ case N_QUANTIFIER:
+ {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ if (qn->lower == qn->upper) {
+ r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ if (r == 0)
+ *len = distance_multiply(tlen, qn->lower);
+ }
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (! IS_CALL_RECURSION(&(NCALL(node))))
+ r = get_char_length_tree1(NCALL(node).target, reg, len, level);
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+#endif
+
+ case N_CTYPE:
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *len = 1;
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ case N_ANYCHAR:
+ *len = 1;
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_EFFECT_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
+#endif
+ case EFFECT_OPTION:
+ case EFFECT_STOP_BACKTRACK:
+ r = get_char_length_tree1(en->target, reg, len, level);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ break;
+
+ default:
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_char_length_tree(Node* node, regex_t* reg, int* len)
+{
+ return get_char_length_tree1(node, reg, len, 0);
+}
+
+/* x is not included y ==> 1 : 0 */
+static int
+is_not_included(Node* x, Node* y, regex_t* reg)
+{
+ int i, len;
+ OnigCodePoint code;
+ UChar *p, c;
+ int ytype;
+
+ retry:
+ ytype = NTYPE(y);
+ switch (NTYPE(x)) {
+ case N_CTYPE:
+ {
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(x).type) {
+ case CTYPE_WORD:
+ if (NCTYPE(y).type == CTYPE_NOT_WORD)
+ return 1;
+ else
+ return 0;
+ break;
+ case CTYPE_NOT_WORD:
+ if (NCTYPE(y).type == CTYPE_WORD)
+ return 1;
+ else
+ return 0;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ swap:
+ {
+ Node* tmp;
+ tmp = x; x = y; y = tmp;
+ goto retry;
+ }
+ break;
+
+ case N_STRING:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ CClassNode* xc = &(NCCLASS(x));
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(y).type) {
+ case CTYPE_WORD:
+ if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+ break;
+ case CTYPE_NOT_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
+ if (!IS_CCLASS_NOT(xc)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ }
+ }
+ return 1;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ int v;
+ CClassNode* yc = &(NCCLASS(y));
+
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ v = BITSET_AT(xc->bs, i);
+ if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
+ (v == 0 && IS_CCLASS_NOT(xc))) {
+ v = BITSET_AT(yc->bs, i);
+ if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
+ (v == 0 && IS_CCLASS_NOT(yc)))
+ return 0;
+ }
+ }
+ if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
+ return 1;
+ return 0;
+ }
+ break;
+
+ case N_STRING:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* xs = &(NSTRING(x));
+ if (NSTRING_LEN(x) == 0)
+ break;
+
+ c = *(xs->s);
+ switch (ytype) {
+ case N_CTYPE:
+ switch (NCTYPE(y).type) {
+ case CTYPE_WORD:
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
+ break;
+ case CTYPE_NOT_WORD:
+ return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ CClassNode* cc = &(NCCLASS(y));
+
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
+
+ case N_STRING:
+ {
+ UChar *q;
+ StrNode* ys = &(NSTRING(y));
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ /* tiny version */
+ return 0;
+ }
+ else {
+ for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
+ if (*p != *q) return 1;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static Node*
+get_head_value_node(Node* node, int exact, regex_t* reg)
+{
+ Node* n = NULL_NODE;
+
+ switch (NTYPE(node)) {
+ case N_BACKREF:
+ case N_ALT:
+ case N_ANYCHAR:
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+#endif
+ break;
+
+ case N_CTYPE:
+ case N_CCLASS:
+ if (exact == 0) {
+ n = node;
+ }
+ break;
+
+ case N_LIST:
+ n = get_head_value_node(NCONS(node).left, exact, reg);
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+
+ if (sn->end <= sn->s)
+ break;
+
+ if (exact != 0 &&
+ !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
+#if 0
+ UChar* tmp = sn->s;
+ if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
+ &tmp, sn->end))
+ n = node;
+#endif
+ }
+ else {
+ n = node;
+ }
+ }
+ break;
+
+ case N_QUANTIFIER:
+ {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ if (qn->lower > 0) {
+ if (IS_NOT_NULL(qn->head_exact))
+ n = qn->head_exact;
+ else
+ n = get_head_value_node(qn->target, exact, reg);
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType options = reg->options;
+
+ reg->options = NEFFECT(node).option;
+ n = get_head_value_node(NEFFECT(node).target, exact, reg);
+ reg->options = options;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+ case EFFECT_STOP_BACKTRACK:
+ n = get_head_value_node(en->target, exact, reg);
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ if (NANCHOR(node).type == ANCHOR_PREC_READ)
+ n = get_head_value_node(NANCHOR(node).target, exact, reg);
+ break;
+
+ default:
+ break;
+ }
+
+ return n;
+}
+
+static int
+check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
+{
+ int type, r = 0;
+
+ type = NTYPE(node);
+ if ((type & type_mask) == 0)
+ return 1;
+
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUANTIFIER:
+ r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
+ anchor_mask);
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+ if ((en->type & effect_mask) == 0)
+ return 1;
+
+ r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
+ }
+ break;
+
+ case N_ANCHOR:
+ type = NANCHOR(node).type;
+ if ((type & anchor_mask) == 0)
+ return 1;
+
+ if (NANCHOR(node).target)
+ r = check_type_tree(NANCHOR(node).target,
+ type_mask, effect_mask, anchor_mask);
+ break;
+
+ default:
+ break;
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+
+#define RECURSION_EXIST 1
+#define RECURSION_INFINITE 2
+
+static int
+subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node *x;
+ OnigDistance min;
+ int ret;
+
+ x = node;
+ do {
+ ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ ret = get_min_match_length(NCONS(x).left, &min, env);
+ if (ret != 0) return ret;
+ if (min != 0) head = 0;
+ }
+ } while (IS_NOT_NULL(x = NCONS(x).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ r = RECURSION_EXIST;
+ do {
+ ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUANTIFIER:
+ r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQUANTIFIER(node).lower == 0) r = 0;
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check(an->target, env, head);
+ break;
+ }
+ }
+ break;
+
+ case N_CALL:
+ r = subexp_inf_recursive_check(NCALL(node).target, env, head);
+ break;
+
+ case N_EFFECT:
+ if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ return 0;
+ else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
+ else {
+ SET_EFFECT_STATUS(node, NST_MARK2);
+ r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
+ CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUANTIFIER:
+ r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ if (IS_EFFECT_RECURSION(en)) {
+ SET_EFFECT_STATUS(node, NST_MARK1);
+ r = subexp_inf_recursive_check(en->target, env, 1);
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
+ CLEAR_EFFECT_STATUS(node, NST_MARK1);
+ }
+ r = subexp_inf_recursive_check_trav(en->target, env);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_recursive_check(Node* node)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ do {
+ r |= subexp_recursive_check(NCONS(node).left);
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUANTIFIER:
+ r = subexp_recursive_check(NQUANTIFIER(node).target);
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check(an->target);
+ break;
+ }
+ }
+ break;
+
+ case N_CALL:
+ r = subexp_recursive_check(NCALL(node).target);
+ if (r != 0) SET_CALL_RECURSION(node);
+ break;
+
+ case N_EFFECT:
+ if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+ return 0;
+ else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+ return 1; /* recursion */
+ else {
+ SET_EFFECT_STATUS(node, NST_MARK2);
+ r = subexp_recursive_check(NEFFECT(node).target);
+ CLEAR_EFFECT_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+
+static int
+subexp_recursive_check_trav(Node* node, ScanEnv* env)
+{
+#define FOUND_CALLED_NODE 1
+
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = subexp_recursive_check_trav(NCONS(node).left, env);
+ if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
+ else if (ret < 0) return ret;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUANTIFIER:
+ r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
+ if (NQUANTIFIER(node).upper == 0) {
+ if (r == FOUND_CALLED_NODE)
+ NQUANTIFIER(node).is_refered = 1;
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ if (! IS_EFFECT_RECURSION(en)) {
+ if (IS_EFFECT_CALLED(en)) {
+ SET_EFFECT_STATUS(node, NST_MARK1);
+ r = subexp_recursive_check(en->target);
+ if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
+ CLEAR_EFFECT_STATUS(node, NST_MARK1);
+ }
+ }
+ r = subexp_recursive_check_trav(en->target, env);
+ if (IS_EFFECT_CALLED(en))
+ r |= FOUND_CALLED_NODE;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+setup_subexp_call(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ do {
+ r = setup_subexp_call(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_ALT:
+ do {
+ r = setup_subexp_call(NCONS(node).left, env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_QUANTIFIER:
+ r = setup_subexp_call(NQUANTIFIER(node).target, env);
+ break;
+ case N_EFFECT:
+ r = setup_subexp_call(NEFFECT(node).target, env);
+ break;
+
+ case N_CALL:
+ {
+ int n, num, *refs;
+ UChar *p;
+ CallNode* cn = &(NCALL(node));
+ Node** nodes = SCANENV_MEM_NODES(env);
+
+#ifdef USE_NAMED_GROUP
+ n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
+#else
+ n = -1;
+#endif
+ if (n <= 0) {
+ /* name not found, check group number. (?*ddd) */
+ p = cn->name;
+ num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
+ if (num <= 0 || p != cn->name_end) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+#ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (num > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+ cn->ref_num = num;
+ goto set_call_attr;
+ }
+ else if (n > 1) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->ref_num = refs[0];
+ set_call_attr:
+ cn->target = nodes[cn->ref_num];
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ SET_EFFECT_STATUS(cn->target, NST_CALLED);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
+ cn->unset_addr_list = env->unset_addr_list;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = setup_subexp_call(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
+/* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+*/
+static int
+divide_look_behind_alternatives(Node* node)
+{
+ Node tmp_node;
+ Node *head, *np, *insert_node;
+ AnchorNode* an = &(NANCHOR(node));
+ int anc_type = an->type;
+
+ head = an->target;
+ np = NCONS(head).left;
+ tmp_node = *node; *node = *head; *head = tmp_node;
+ NCONS(node).left = head;
+ NANCHOR(head).target = np;
+
+ np = node;
+ while ((np = NCONS(np).right) != NULL_NODE) {
+ insert_node = onig_node_new_anchor(anc_type);
+ CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
+ NANCHOR(insert_node).target = NCONS(np).left;
+ NCONS(np).left = insert_node;
+ }
+
+ if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ np->type = N_LIST; /* alt -> list */
+ } while ((np = NCONS(np).right) != NULL_NODE);
+ }
+ return 0;
+}
+
+static int
+setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
+{
+ int r, len;
+ AnchorNode* an = &(NANCHOR(node));
+
+ r = get_char_length_tree(an->target, reg, &len);
+ if (r == 0)
+ an->char_len = len;
+ else if (r == GET_CHAR_LEN_VARLEN)
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
+ r = divide_look_behind_alternatives(node);
+ else
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+
+ return r;
+}
+
+static int
+next_setup(Node* node, Node* next_node, regex_t* reg)
+{
+ int type;
+
+ retry:
+ type = NTYPE(node);
+ if (type == N_QUANTIFIER) {
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+#ifdef USE_QUANTIFIER_PEEK_NEXT
+ qn->next_head_exact = get_head_value_node(next_node, 1, reg);
+#endif
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn->lower <= 1) {
+ int ttype = NTYPE(qn->target);
+ if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ Node *x, *y;
+ x = get_head_value_node(qn->target, 0, reg);
+ if (IS_NOT_NULL(x)) {
+ y = get_head_value_node(next_node, 0, reg);
+ if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
+ Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
+ SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+ swap_node(node, en);
+ NEFFECT(node).target = en;
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (type == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(node));
+ if (en->type == EFFECT_MEMORY) {
+ node = en->target;
+ goto retry;
+ }
+ }
+ return 0;
+}
+
+
+static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+ UChar* prev_start, UChar* prev,
+ UChar* end, Node*** tailp, Node** root)
+{
+ UChar *tmp, *wp;
+ Node* snode;
+
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < prev) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, prev);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (*tailp == (Node** )0) {
+ *root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+ *tailp = &(NCONS(*root).right);
+ }
+ else {
+ **tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+ *tailp = &(NCONS(**tailp).right);
+ }
+
+ return 0;
+}
+
+static int
+divide_ambig_string_node(Node* node, regex_t* reg)
+{
+ StrNode* sn = &NSTRING(node);
+ int ambig, prev_ambig;
+ UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
+ Node *root = NULL_NODE;
+ Node **tailp = (Node** )0;
+ int r;
+
+ start = prev_start = p = sn->s;
+ end = sn->end;
+ if (p >= end) return 0;
+
+ prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
+
+ while (p < end) {
+ prev = p;
+ if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
+ reg->ambig_flag, &p, end))) {
+
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+ end, &tailp, &root);
+ if (r != 0) return r;
+
+ prev_ambig = ambig;
+ prev_start = prev;
+ }
+ }
+
+ if (prev_start == start) {
+ if (prev_ambig != 0) {
+ NSTRING_SET_AMBIG(node);
+ tmp = start;
+ wp = start;
+ while (tmp < end) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
+ sn->end = wp;
+ }
+ }
+ else {
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+ end, &tailp, &root);
+ if (r != 0) return r;
+
+ swap_node(node, root);
+ onig_node_str_clear(root); /* should be after swap! */
+ onig_node_free(root); /* free original string node */
+ }
+
+ return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT 512
+#define CEC_INFINITE_NUM 0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT (1<<0)
+#define CEC_IN_FINITE_REPEAT (1<<1)
+#define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_comb_exp_check(NCONS(node).left, r, env);
+ prev = NCONS(node).left;
+ } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCONS(node).left, state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_QUANTIFIER:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(qn->target));
+ if (en->type == EFFECT_MEMORY) {
+ if (NTYPE(en->target) == N_QUANTIFIER) {
+ QuantifierNode* q = &(NQUANTIFIER(en->target));
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
+
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node).target, state, env);
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+
+/* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+static int
+setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_tree(NCONS(node).left, reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NCONS(node).left, reg);
+ }
+ prev = NCONS(node).left;
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ }
+ break;
+
+ case N_ALT:
+ do {
+ r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ case N_CCLASS:
+ break;
+
+ case N_STRING:
+ if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
+ r = divide_ambig_string_node(node, reg);
+ }
+ break;
+
+ case N_CTYPE:
+ case N_ANYCHAR:
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ break;
+#endif
+
+ case N_BACKREF:
+ {
+ int i;
+ int* p;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BackrefNode* br = &(NBACKREF(node));
+ p = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
+#endif
+ SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+ }
+ }
+ break;
+
+ case N_QUANTIFIER:
+ {
+ OnigDistance d;
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+ Node* target = qn->target;
+
+ if ((state & IN_REPEAT) != 0) {
+ qn->state |= NST_IN_REPEAT;
+ }
+
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ r = get_min_match_length(target, &d, env);
+ if (r) break;
+ if (d == 0) {
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ r = quantifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
+#endif
+#if 0
+ r = get_max_match_length(target, &d, env);
+ if (r == 0 && d == 0) {
+ /* ()* ==> ()?, ()+ ==> () */
+ qn->upper = 1;
+ if (qn->lower > 1) qn->lower = 1;
+ if (NTYPE(target) == N_STRING) {
+ qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
+ }
+ }
+#endif
+ }
+ }
+
+ state |= IN_REPEAT;
+ if (qn->lower != qn->upper)
+ state |= IN_VAR_REPEAT;
+ r = setup_tree(target, reg, state, env);
+ if (r) break;
+
+ /* expand string */
+#define EXPAND_STRING_MAX_LENGTH 100
+ if (NTYPE(target) == N_STRING) {
+ if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int len = NSTRING_LEN(target);
+ StrNode* sn = &(NSTRING(target));
+
+ if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int i, n = qn->lower;
+ onig_node_conv_to_str_node(node, NSTRING(target).flag);
+ for (i = 0; i < n; i++) {
+ r = onig_node_str_cat(node, sn->s, sn->end);
+ if (r) break;
+ }
+ onig_node_free(target);
+ break; /* break case N_QUANTIFIER: */
+ }
+ }
+ }
+
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (qn->greedy && (qn->target_empty_info != 0)) {
+ if (NTYPE(target) == N_QUANTIFIER) {
+ QuantifierNode* tqn = &(NQUANTIFIER(target));
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ }
+ }
+#endif
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType options = reg->options;
+ reg->options = NEFFECT(node).option;
+ r = setup_tree(NEFFECT(node).target, reg, state, env);
+ reg->options = options;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
+ /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ }
+ r = setup_tree(en->target, reg, state, env);
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ {
+ Node* target = en->target;
+ r = setup_tree(target, reg, state, env);
+ if (NTYPE(target) == N_QUANTIFIER) {
+ QuantifierNode* tqn = &(NQUANTIFIER(target));
+ if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ tqn->greedy != 0) { /* (?>a*), a*+ etc... */
+ int qtype = NTYPE(tqn->target);
+ if (IS_NODE_TYPE_SIMPLE(qtype))
+ SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+ }
+ }
+ }
+ break;
+ }
+ }
+ break;
+
+ case N_ANCHOR:
+ {
+ AnchorNode* an = &(NANCHOR(node));
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ r = setup_tree(an->target, reg, state, env);
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ break;
+
+/* allowed node types in look-behind */
+#define ALLOWED_TYPE_IN_LB \
+ ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
+ N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
+
+#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
+#define ALLOWED_EFFECT_IN_LB_NOT 0
+
+#define ALLOWED_ANCHOR_IN_LB \
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+#define ALLOWED_ANCHOR_IN_LB_NOT \
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, state, env);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ }
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/* set skip map for Boyer-Moor search */
+static int
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
+ UChar skip[], int** int_skip)
+{
+ int i, len;
+
+ len = end - s;
+ if (len < ONIG_CHAR_TABLE_SIZE) {
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+
+ for (i = 0; i < len - 1; i++)
+ skip[s[i]] = len - 1 - i;
+ }
+ else {
+ if (IS_NULL(*int_skip)) {
+ *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
+ }
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
+
+ for (i = 0; i < len - 1; i++)
+ (*int_skip)[s[i]] = len - 1 - i;
+ }
+ return 0;
+}
+
+#define OPT_EXACT_MAXLEN 24
+
+typedef struct {
+ OnigDistance min; /* min byte length */
+ OnigDistance max; /* max byte length */
+} MinMaxLen;
+
+typedef struct {
+ MinMaxLen mmd;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigAmbigType ambig_flag;
+ ScanEnv* scan_env;
+} OptEnv;
+
+typedef struct {
+ int left_anchor;
+ int right_anchor;
+} OptAncInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int reach_end;
+ int ignore_case;
+ int len;
+ UChar s[OPT_EXACT_MAXLEN];
+} OptExactInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int value; /* weighted value */
+ UChar map[ONIG_CHAR_TABLE_SIZE];
+} OptMapInfo;
+
+typedef struct {
+ MinMaxLen len;
+
+ OptAncInfo anc;
+ OptExactInfo exb; /* boundary */
+ OptExactInfo exm; /* middle */
+ OptExactInfo expr; /* prec read (?=...) */
+
+ OptMapInfo map; /* boundary */
+} NodeOptInfo;
+
+
+static int
+map_position_value(OnigEncoding enc, int i)
+{
+ static const short int ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
+ if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
+ return 20;
+ else
+ return (int )ByteValTable[i];
+ }
+ else
+ return 4; /* Take it easy. */
+}
+
+static int
+distance_value(MinMaxLen* mm)
+{
+ /* 1000 / (min-max-dist + 1) */
+ static const short int dist_vals[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int d;
+
+ if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
+
+ d = mm->max - mm->min;
+ if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return (int )dist_vals[d];
+ else
+ return 1;
+}
+
+static int
+comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
+{
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distance_value(d1);
+ v2 *= distance_value(d2);
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (d2->min < d1->min) return 1;
+ if (d2->min > d1->min) return -1;
+ return 0;
+}
+
+static int
+is_equal_mml(MinMaxLen* a, MinMaxLen* b)
+{
+ return (a->min == b->min && a->max == b->max) ? 1 : 0;
+}
+
+
+static void
+set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
+{
+ mml->min = min;
+ mml->max = max;
+}
+
+static void
+clear_mml(MinMaxLen* mml)
+{
+ mml->min = mml->max = 0;
+}
+
+static void
+copy_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = from->min;
+ to->max = from->max;
+}
+
+static void
+add_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = distance_add(to->min, from->min);
+ to->max = distance_add(to->max, from->max);
+}
+
+#if 0
+static void
+add_len_mml(MinMaxLen* to, OnigDistance len)
+{
+ to->min = distance_add(to->min, len);
+ to->max = distance_add(to->max, len);
+}
+#endif
+
+static void
+alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ if (to->min > from->min) to->min = from->min;
+ if (to->max < from->max) to->max = from->max;
+}
+
+static void
+copy_opt_env(OptEnv* to, OptEnv* from)
+{
+ *to = *from;
+}
+
+static void
+clear_opt_anc_info(OptAncInfo* anc)
+{
+ anc->left_anchor = 0;
+ anc->right_anchor = 0;
+}
+
+static void
+copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
+ OnigDistance left_len, OnigDistance right_len)
+{
+ clear_opt_anc_info(to);
+
+ to->left_anchor = left->left_anchor;
+ if (left_len == 0) {
+ to->left_anchor |= right->left_anchor;
+ }
+
+ to->right_anchor = right->right_anchor;
+ if (right_len == 0) {
+ to->right_anchor |= left->right_anchor;
+ }
+}
+
+static int
+is_left_anchor(int anc)
+{
+ if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
+ anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
+ anc == ANCHOR_PREC_READ_NOT)
+ return 0;
+
+ return 1;
+}
+
+static int
+is_set_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if ((to->left_anchor & anc) != 0) return 1;
+
+ return ((to->right_anchor & anc) != 0 ? 1 : 0);
+}
+
+static void
+add_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor |= anc;
+ else
+ to->right_anchor |= anc;
+}
+
+static void
+remove_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor &= ~anc;
+ else
+ to->right_anchor &= ~anc;
+}
+
+static void
+alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
+{
+ to->left_anchor &= add->left_anchor;
+ to->right_anchor &= add->right_anchor;
+}
+
+static int
+is_full_opt_exact_info(OptExactInfo* ex)
+{
+ return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
+}
+
+static void
+clear_opt_exact_info(OptExactInfo* ex)
+{
+ clear_mml(&ex->mmd);
+ clear_opt_anc_info(&ex->anc);
+ ex->reach_end = 0;
+ ex->ignore_case = 0;
+ ex->len = 0;
+ ex->s[0] = '\0';
+}
+
+static void
+copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
+{
+ int i, j, len;
+ UChar *p, *end;
+ OptAncInfo tanc;
+
+ if (! to->ignore_case && add->ignore_case) {
+ if (to->len >= add->len) return ; /* avoid */
+
+ to->ignore_case = 1;
+ }
+
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
+
+ to->len = i;
+ to->reach_end = (p == end ? add->reach_end : 0);
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
+ if (! to->reach_end) tanc.right_anchor = 0;
+ copy_opt_anc_info(&to->anc, &tanc);
+}
+
+static void
+concat_opt_exact_info_str(OptExactInfo* to,
+ UChar* s, UChar* end, int raw, OnigEncoding enc)
+{
+ int i, j, len;
+ UChar *p;
+
+ for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
+
+ to->len = i;
+}
+
+static void
+alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
+{
+ int i, j, len;
+
+ if (add->len == 0 || to->len == 0) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ if (! is_equal_mml(&to->mmd, &add->mmd)) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ for (i = 0; i < to->len && i < add->len; ) {
+ if (to->s[i] != add->s[i]) break;
+ len = enc_len(env->enc, to->s + i);
+
+ for (j = 1; j < len; j++) {
+ if (to->s[i+j] != add->s[i+j]) break;
+ }
+ if (j < len) break;
+ i += len;
+ }
+
+ if (! add->reach_end || i < add->len || i < to->len) {
+ to->reach_end = 0;
+ }
+ to->len = i;
+ to->ignore_case |= add->ignore_case;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+ if (! to->reach_end) to->anc.right_anchor = 0;
+}
+
+static void
+select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
+{
+ int v1, v2;
+
+ v1 = now->len;
+ v2 = alt->len;
+
+ if (v2 == 0) {
+ return ;
+ }
+ else if (v1 == 0) {
+ copy_opt_exact_info(now, alt);
+ return ;
+ }
+ else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = map_position_value(enc, now->s[0]);
+ v1 = map_position_value(enc, alt->s[0]);
+
+ if (now->len > 1) v1 += 5;
+ if (alt->len > 1) v2 += 5;
+ }
+
+ if (now->ignore_case == 0) v1 *= 2;
+ if (alt->ignore_case == 0) v2 *= 2;
+
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ copy_opt_exact_info(now, alt);
+}
+
+static void
+clear_opt_map_info(OptMapInfo* map)
+{
+ static const OptMapInfo clean_info = {
+ {0, 0}, {0, 0}, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ }
+ };
+
+ xmemcpy(map, &clean_info, sizeof(OptMapInfo));
+}
+
+static void
+copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
+{
+ *to = *from;
+}
+
+static void
+add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
+{
+ if (map->map[c] == 0) {
+ map->map[c] = 1;
+ map->value += map_position_value(enc, c);
+ }
+}
+
+static int
+add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
+ OnigEncoding enc, OnigAmbigType ambig_flag)
+{
+ int i, n, len;
+ UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ OnigCodePoint code;
+ const OnigPairAmbigCodes* pccs;
+ OnigAmbigType amb;
+
+ add_char_opt_map_info(map, p[0], enc);
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+
+ for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
+ if ((amb & ambig_flag) == 0) continue;
+
+ n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
+ for (i = 0; i < n; i++) {
+ if (pccs[i].from == code) {
+ len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
+ if (len < 0) return len;
+ add_char_opt_map_info(map, buf[0], enc);
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
+{
+ static int z = 1<<15; /* 32768: something big value */
+
+ int v1, v2;
+
+ if (alt->value == 0) return ;
+ if (now->value == 0) {
+ copy_opt_map_info(now, alt);
+ return ;
+ }
+
+ v1 = z / now->value;
+ v2 = z / alt->value;
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ copy_opt_map_info(now, alt);
+}
+
+static int
+comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
+{
+#define COMP_EM_BASE 20
+ int ve, vm;
+
+ if (m->value <= 0) return -1;
+
+ ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ vm = COMP_EM_BASE * 5 * 2 / m->value;
+ return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
+}
+
+static void
+alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
+{
+ int i, val;
+
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (to->value == 0) return ;
+ if (add->value == 0 || to->mmd.max < add->mmd.min) {
+ clear_opt_map_info(to);
+ return ;
+ }
+
+ alt_merge_mml(&to->mmd, &add->mmd);
+
+ val = 0;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (add->map[i])
+ to->map[i] = 1;
+
+ if (to->map[i])
+ val += map_position_value(enc, i);
+ }
+ to->value = val;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+}
+
+static void
+set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
+{
+ copy_mml(&(opt->exb.mmd), mmd);
+ copy_mml(&(opt->expr.mmd), mmd);
+ copy_mml(&(opt->map.mmd), mmd);
+}
+
+static void
+clear_node_opt_info(NodeOptInfo* opt)
+{
+ clear_mml(&opt->len);
+ clear_opt_anc_info(&opt->anc);
+ clear_opt_exact_info(&opt->exb);
+ clear_opt_exact_info(&opt->exm);
+ clear_opt_exact_info(&opt->expr);
+ clear_opt_map_info(&opt->map);
+}
+
+static void
+copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
+{
+ int exb_reach, exm_reach;
+ OptAncInfo tanc;
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
+ copy_opt_anc_info(&to->anc, &tanc);
+
+ if (add->exb.len > 0 && to->len.max == 0) {
+ concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
+ to->len.max, add->len.max);
+ copy_opt_anc_info(&add->exb.anc, &tanc);
+ }
+
+ if (add->map.value > 0 && to->len.max == 0) {
+ if (add->map.mmd.max == 0)
+ add->map.anc.left_anchor |= to->anc.left_anchor;
+ }
+
+ exb_reach = to->exb.reach_end;
+ exm_reach = to->exm.reach_end;
+
+ if (add->len.max != 0)
+ to->exb.reach_end = to->exm.reach_end = 0;
+
+ if (add->exb.len > 0) {
+ if (exb_reach) {
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
+ clear_opt_exact_info(&add->exb);
+ }
+ else if (exm_reach) {
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
+ clear_opt_exact_info(&add->exb);
+ }
+ }
+ select_opt_exact_info(enc, &to->exm, &add->exb);
+ select_opt_exact_info(enc, &to->exm, &add->exm);
+
+ if (to->expr.len > 0) {
+ if (add->len.max > 0) {
+ if (to->expr.len > (int )add->len.max)
+ to->expr.len = add->len.max;
+
+ if (to->expr.mmd.max == 0)
+ select_opt_exact_info(enc, &to->exb, &to->expr);
+ else
+ select_opt_exact_info(enc, &to->exm, &to->expr);
+ }
+ }
+ else if (add->expr.len > 0) {
+ copy_opt_exact_info(&to->expr, &add->expr);
+ }
+
+ select_opt_map_info(&to->map, &add->map);
+
+ add_mml(&to->len, &add->len);
+}
+
+static void
+alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
+{
+ alt_merge_opt_anc_info (&to->anc, &add->anc);
+ alt_merge_opt_exact_info(&to->exb, &add->exb, env);
+ alt_merge_opt_exact_info(&to->exm, &add->exm, env);
+ alt_merge_opt_exact_info(&to->expr, &add->expr, env);
+ alt_merge_opt_map_info(env->enc, &to->map, &add->map);
+
+ alt_merge_mml(&to->len, &add->len);
+}
+
+
+#define MAX_NODE_OPT_INFO_REF_COUNT 5
+
+static int
+optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
+{
+ int type;
+ int r = 0;
+
+ clear_node_opt_info(opt);
+ set_bound_node_opt_info(opt, &env->mmd);
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ OptEnv nenv;
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ copy_opt_env(&nenv, env);
+ do {
+ r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ }
+ } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
+ }
+ break;
+
+ case N_ALT:
+ {
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ do {
+ r = optimize_node_left(NCONS(nd).left, &nopt, env);
+ if (r == 0) {
+ if (nd == node) copy_node_opt_info(opt, &nopt);
+ else alt_merge_node_opt_info(opt, &nopt, env);
+ }
+ } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
+ }
+ break;
+
+ case N_STRING:
+ {
+ StrNode* sn = &(NSTRING(node));
+ int slen = sn->end - sn->s;
+ int is_raw = NSTRING_IS_RAW(node);
+
+ if (! NSTRING_IS_AMBIG(node)) {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ NSTRING_IS_RAW(node), env->enc);
+ if (slen > 0) {
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
+ }
+ set_mml(&opt->len, slen, slen);
+ }
+ else {
+ int n, max;
+
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+
+ if (slen > 0) {
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->ambig_flag);
+ if (r != 0) break;
+ }
+
+ if (NSTRING_IS_AMBIG_REDUCE(node)) {
+ n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ }
+ else {
+ max = slen;
+ }
+ set_mml(&opt->len, slen, max);
+ }
+
+ if (opt->exb.len == slen)
+ opt->exb.reach_end = 1;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ int i, z;
+ CClassNode* cc = &(NCCLASS(node));
+
+ /* no need to check ignore case. (setted in setup_tree()) */
+
+ if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ set_mml(&opt->len, min, max);
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ set_mml(&opt->len, 1, 1);
+ }
+ }
+ break;
+
+ case N_CTYPE:
+ {
+ int i, min, max;
+
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ if (max == 1) {
+ min = 1;
+
+ switch (NCTYPE(node).type) {
+ case CTYPE_NOT_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ break;
+
+ case CTYPE_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ break;
+ }
+ }
+ else {
+ min = ONIGENC_MBC_MINLEN(env->enc);
+ }
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case N_ANYCHAR:
+ {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case N_ANCHOR:
+ switch (NANCHOR(node).type) {
+ case ANCHOR_BEGIN_BUF:
+ case ANCHOR_BEGIN_POSITION:
+ case ANCHOR_BEGIN_LINE:
+ case ANCHOR_END_BUF:
+ case ANCHOR_SEMI_END_BUF:
+ case ANCHOR_END_LINE:
+ add_opt_anc_info(&opt->anc, NANCHOR(node).type);
+ break;
+
+ case ANCHOR_PREC_READ:
+ {
+ NodeOptInfo nopt;
+
+ r = optimize_node_left(NANCHOR(node).target, &nopt, env);
+ if (r == 0) {
+ if (nopt.exb.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exb);
+ else if (nopt.exm.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exm);
+
+ opt->expr.reach_end = 0;
+
+ if (nopt.map.value > 0)
+ copy_opt_map_info(&opt->map, &nopt.map);
+ }
+ }
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case ANCHOR_LOOK_BEHIND_NOT:
+ break;
+ }
+ break;
+
+ case N_BACKREF:
+ {
+ int i;
+ int* backs;
+ OnigDistance min, max, tmin, tmax;
+ Node** nodes = SCANENV_MEM_NODES(env->scan_env);
+ BackrefNode* br = &(NBACKREF(node));
+
+ if (br->state & NST_RECURSION) {
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
+ }
+ backs = BACKREFS_P(br);
+ r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
+ if (r != 0) break;
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ if (r == 0) set_mml(&opt->len, min, max);
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ else {
+ OnigOptionType save = env->options;
+ env->options = NEFFECT(NCALL(node).target).option;
+ r = optimize_node_left(NCALL(node).target, opt, env);
+ env->options = save;
+ }
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ {
+ int i;
+ OnigDistance min, max;
+ NodeOptInfo nopt;
+ QuantifierNode* qn = &(NQUANTIFIER(node));
+
+ r = optimize_node_left(qn->target, &nopt, env);
+ if (r) break;
+
+ if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
+ if (env->mmd.max == 0 &&
+ NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
+ if (IS_MULTILINE(env->options))
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
+ else
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ }
+ }
+ else {
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &nopt);
+ if (nopt.exb.len > 0) {
+ if (nopt.exb.reach_end) {
+ for (i = 2; i < qn->lower &&
+ ! is_full_opt_exact_info(&opt->exb); i++) {
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
+ }
+ if (i < qn->lower) {
+ opt->exb.reach_end = 0;
+ }
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
+ }
+
+ min = distance_multiply(nopt.len.min, qn->lower);
+ if (IS_REPEAT_INFINITE(qn->upper))
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ else
+ max = distance_multiply(nopt.len.max, qn->upper);
+
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_OPTION:
+ {
+ OnigOptionType save = env->options;
+
+ env->options = en->option;
+ r = optimize_node_left(en->target, opt, env);
+ env->options = save;
+ }
+ break;
+
+ case EFFECT_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ en->opt_count++;
+ if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
+ OnigDistance min, max;
+
+ min = 0;
+ max = ONIG_INFINITE_DISTANCE;
+ if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
+ if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
+ set_mml(&opt->len, min, max);
+ }
+ else
+#endif
+ {
+ r = optimize_node_left(en->target, opt, env);
+
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ }
+ }
+ break;
+
+ case EFFECT_STOP_BACKTRACK:
+ r = optimize_node_left(en->target, opt, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "optimize_node_left: undefined node type %d\n",
+ NTYPE(node));
+#endif
+ r = ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
+{
+ int r;
+
+ if (e->len == 0) return 0;
+
+ if (e->ignore_case) {
+ reg->exact = (UChar* )xmalloc(e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ xmemcpy(reg->exact, e->s, e->len);
+ reg->exact_end = reg->exact + e->len;
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ }
+ else {
+ int allow_reverse;
+
+ reg->exact = k_strdup(e->s, e->s + e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ reg->exact_end = reg->exact + e->len;
+
+ allow_reverse =
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+
+ if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
+ r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
+ reg->map, &(reg->int_map));
+ if (r) return r;
+
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ }
+ else {
+ reg->optimize = ONIG_OPTIMIZE_EXACT;
+ }
+ }
+
+ reg->dmin = e->mmd.min;
+ reg->dmax = e->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
+ }
+
+ return 0;
+}
+
+static void
+set_optimize_map_info(regex_t* reg, OptMapInfo* m)
+{
+ int i;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ reg->map[i] = m->map[i];
+
+ reg->optimize = ONIG_OPTIMIZE_MAP;
+ reg->dmin = m->mmd.min;
+ reg->dmax = m->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + 1;
+ }
+}
+
+static void
+set_sub_anchor(regex_t* reg, OptAncInfo* anc)
+{
+ reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
+ reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
+}
+
+#ifdef ONIG_DEBUG
+static void print_optimize_info(FILE* f, regex_t* reg);
+#endif
+
+static int
+set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
+{
+
+ int r;
+ NodeOptInfo opt;
+ OptEnv env;
+
+ env.enc = reg->enc;
+ env.options = reg->options;
+ env.ambig_flag = reg->ambig_flag;
+ env.scan_env = scan_env;
+ clear_mml(&env.mmd);
+
+ r = optimize_node_left(node, &opt, &env);
+ if (r) return r;
+
+ reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
+
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+
+ if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
+ reg->anchor_dmin = opt.len.min;
+ reg->anchor_dmax = opt.len.max;
+ }
+
+ if (opt.exb.len > 0 || opt.exm.len > 0) {
+ select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
+ if (opt.map.value > 0 &&
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ goto set_map;
+ }
+ else {
+ r = set_optimize_exact_info(reg, &opt.exb);
+ set_sub_anchor(reg, &opt.exb.anc);
+ }
+ }
+ else if (opt.map.value > 0) {
+ set_map:
+ set_optimize_map_info(reg, &opt.map);
+ set_sub_anchor(reg, &opt.map.anc);
+ }
+ else {
+ reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
+ if (opt.len.max == 0)
+ reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
+ }
+
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
+ print_optimize_info(stderr, reg);
+#endif
+ return r;
+}
+
+static void
+clear_optimize_info(regex_t* reg)
+{
+ reg->optimize = ONIG_OPTIMIZE_NONE;
+ reg->anchor = 0;
+ reg->anchor_dmin = 0;
+ reg->anchor_dmax = 0;
+ reg->sub_anchor = 0;
+ reg->exact_end = (UChar* )NULL;
+ reg->threshold_len = 0;
+ if (IS_NOT_NULL(reg->exact)) {
+ xfree(reg->exact);
+ reg->exact = (UChar* )NULL;
+ }
+}
+
+#ifdef ONIG_DEBUG
+
+static void print_enc_string(FILE* fp, OnigEncoding enc,
+ const UChar *s, const UChar *end)
+{
+ fprintf(fp, "\nPATTERN: /");
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ const UChar *p;
+ OnigCodePoint code;
+
+ p = s;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ fprintf(fp, " 0x%04x ", (int )code);
+ }
+ else {
+ fputc((int )code, fp);
+ }
+
+ p += enc_len(enc, p);
+ }
+ }
+ else {
+ while (s < end) {
+ fputc((int )*s, fp);
+ s++;
+ }
+ }
+
+ fprintf(fp, "/\n");
+}
+
+static void
+print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
+{
+ if (a == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", a);
+
+ fputs("-", f);
+
+ if (b == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", b);
+}
+
+static void
+print_anchor(FILE* f, int anchor)
+{
+ int q = 0;
+
+ fprintf(f, "[");
+
+ if (anchor & ANCHOR_BEGIN_BUF) {
+ fprintf(f, "begin-buf");
+ q = 1;
+ }
+ if (anchor & ANCHOR_BEGIN_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-line");
+ }
+ if (anchor & ANCHOR_BEGIN_POSITION) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-pos");
+ }
+ if (anchor & ANCHOR_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-buf");
+ }
+ if (anchor & ANCHOR_SEMI_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "semi-end-buf");
+ }
+ if (anchor & ANCHOR_END_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-line");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "anychar-star");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
+ if (q) fprintf(f, ", ");
+ fprintf(f, "anychar-star-pl");
+ }
+
+ fprintf(f, "]");
+}
+
+static void
+print_optimize_info(FILE* f, regex_t* reg)
+{
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
+
+ fprintf(f, "optimize: %s\n", on[reg->optimize]);
+ fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
+ if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
+ print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
+ fprintf(f, "\n");
+
+ if (reg->optimize) {
+ fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+
+ if (reg->exact) {
+ UChar *p;
+ fprintf(f, "exact: [");
+ for (p = reg->exact; p < reg->exact_end; p++) {
+ fputc(*p, f);
+ }
+ fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
+ }
+ else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
+ int c, i, n = 0;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ if (reg->map[i]) n++;
+
+ fprintf(f, "map: n=%d\n", n);
+ if (n > 0) {
+ c = 0;
+ fputc('[', f);
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (reg->map[i] != 0) {
+ if (c > 0) fputs(", ", f);
+ c++;
+ if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
+ fputc(i, f);
+ else
+ fprintf(f, "%d", i);
+ }
+ }
+ fprintf(f, "]\n");
+ }
+ }
+}
+#endif /* ONIG_DEBUG */
+
+
+static void
+onig_free_body(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg->p)) xfree(reg->p);
+ if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
+ if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
+ if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
+ if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+
+#ifdef USE_NAMED_GROUP
+ onig_names_free(reg);
+#endif
+}
+
+extern void
+onig_free(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg)) {
+ onig_free_body(reg);
+ xfree(reg);
+ }
+}
+
+#define REGEX_TRANSFER(to,from) do {\
+ (to)->state = ONIG_STATE_MODIFY;\
+ onig_free_body(to);\
+ xmemcpy(to, from, sizeof(regex_t));\
+ xfree(from);\
+} while (0)
+
+extern void
+onig_transfer(regex_t* to, regex_t* from)
+{
+ THREAD_ATOMIC_START;
+ REGEX_TRANSFER(to, from);
+ THREAD_ATOMIC_END;
+}
+
+#define REGEX_CHAIN_HEAD(reg) do {\
+ while (IS_NOT_NULL((reg)->chain)) {\
+ (reg) = (reg)->chain;\
+ }\
+} while (0)
+
+extern void
+onig_chain_link_add(regex_t* to, regex_t* add)
+{
+ THREAD_ATOMIC_START;
+ REGEX_CHAIN_HEAD(to);
+ to->chain = add;
+ THREAD_ATOMIC_END;
+}
+
+extern void
+onig_chain_reduce(regex_t* reg)
+{
+ regex_t *head, *prev;
+
+ prev = reg;
+ head = prev->chain;
+ if (IS_NOT_NULL(head)) {
+ reg->state = ONIG_STATE_MODIFY;
+ while (IS_NOT_NULL(head->chain)) {
+ prev = head;
+ head = head->chain;
+ }
+ prev->chain = (regex_t* )NULL;
+ REGEX_TRANSFER(reg, head);
+ }
+}
+
+#if 0
+extern int
+onig_clone(regex_t** to, regex_t* from)
+{
+ int r, size;
+ regex_t* reg;
+
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(from);
+ if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(from);
+ ONIG_STATE_INC(from);
+ }
+ }
+ else {
+ int n = 0;
+ while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ ONIG_STATE_INC(from);
+ }
+#endif /* USE_MULTI_THREAD_SYSTEM */
+
+ r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ from->enc, ONIG_SYNTAX_DEFAULT);
+ if (r != 0) {
+ ONIG_STATE_DEC(from);
+ return r;
+ }
+
+ xmemcpy(reg, from, sizeof(onig_t));
+ reg->chain = (regex_t* )NULL;
+ reg->state = ONIG_STATE_NORMAL;
+
+ if (from->p) {
+ reg->p = (UChar* )xmalloc(reg->alloc);
+ if (IS_NULL(reg->p)) goto mem_error;
+ xmemcpy(reg->p, from->p, reg->alloc);
+ }
+
+ if (from->exact) {
+ reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
+ if (IS_NULL(reg->exact)) goto mem_error;
+ reg->exact_end = reg->exact + (from->exact_end - from->exact);
+ xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
+ }
+
+ if (from->int_map) {
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ reg->int_map = (int* )xmalloc(size);
+ if (IS_NULL(reg->int_map)) goto mem_error;
+ xmemcpy(reg->int_map, from->int_map, size);
+ }
+
+ if (from->int_map_backward) {
+ size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ reg->int_map_backward = (int* )xmalloc(size);
+ if (IS_NULL(reg->int_map_backward)) goto mem_error;
+ xmemcpy(reg->int_map_backward, from->int_map_backward, size);
+ }
+
+#ifdef USE_NAMED_GROUP
+ reg->name_table = names_clone(from); /* names_clone is not implemented */
+#endif
+
+ ONIG_STATE_DEC(from);
+ *to = reg;
+ return 0;
+
+ mem_error:
+ ONIG_STATE_DEC(from);
+ return ONIGERR_MEMORY;
+}
+#endif
+
+#ifdef ONIG_DEBUG
+static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
+#endif
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void print_tree P_((FILE* f, Node* node));
+#endif
+
+extern int
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
+{
+#define COMPILE_INIT_SIZE 20
+
+ int r, init_size;
+ Node* root;
+ ScanEnv scan_env;
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList uslist;
+#endif
+
+ reg->state = ONIG_STATE_COMPILING;
+
+#ifdef ONIG_DEBUG
+ print_enc_string(stderr, reg->enc, pattern, pattern_end);
+#endif
+
+ if (reg->alloc == 0) {
+ init_size = (pattern_end - pattern) * 2;
+ if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
+ r = BBUF_INIT(reg, init_size);
+ if (r != 0) goto end;
+ }
+ else
+ reg->used = 0;
+
+ reg->num_mem = 0;
+ reg->num_repeat = 0;
+ reg->num_null_check = 0;
+ reg->repeat_range_alloc = 0;
+ reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
+
+ r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
+ if (r != 0) goto err;
+
+#ifdef USE_NAMED_GROUP
+ /* mixed use named group and no-named group */
+ if (scan_env.num_named > 0 &&
+ IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ if (scan_env.num_named != scan_env.num_mem)
+ r = disable_noname_group_capture(&root, reg, &scan_env);
+ else
+ r = numbered_ref_check(root);
+
+ if (r != 0) goto err;
+ }
+#endif
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+ print_tree(stderr, root);
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_init(&uslist, scan_env.num_call);
+ if (r != 0) goto err;
+ scan_env.unset_addr_list = &uslist;
+ r = setup_subexp_call(root, &scan_env);
+ if (r != 0) goto err_unset;
+ r = subexp_recursive_check_trav(root, &scan_env);
+ if (r < 0) goto err_unset;
+ r = subexp_inf_recursive_check_trav(root, &scan_env);
+ if (r != 0) goto err_unset;
+
+ reg->num_call = scan_env.num_call;
+ }
+ else
+ reg->num_call = 0;
+#endif
+
+ r = setup_tree(root, reg, 0, &scan_env);
+ if (r != 0) goto err_unset;
+
+ reg->capture_history = scan_env.capture_history;
+ reg->bt_mem_start = scan_env.bt_mem_start;
+ reg->bt_mem_start |= reg->capture_history;
+ if (IS_FIND_CONDITION(reg->options))
+ BIT_STATUS_ON_ALL(reg->bt_mem_end);
+ else {
+ reg->bt_mem_end = scan_env.bt_mem_end;
+ reg->bt_mem_end |= reg->capture_history;
+ }
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+#endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+#endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
+ clear_optimize_info(reg);
+#ifndef ONIG_DONT_OPTIMIZE
+ r = set_optimize_info_from_tree(root, reg, &scan_env);
+ if (r != 0) goto err_unset;
+#endif
+
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
+ xfree(scan_env.mem_nodes_dynamic);
+ scan_env.mem_nodes_dynamic = (Node** )NULL;
+ }
+
+ r = compile_tree(root, reg);
+ if (r == 0) {
+ r = add_opcode(reg, OP_END);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_fix(&uslist, reg);
+ unset_addr_list_end(&uslist);
+ if (r) goto err;
+ }
+#endif
+
+ if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
+ reg->stack_pop_level = STACK_POP_LEVEL_ALL;
+ else {
+ if (reg->bt_mem_start != 0)
+ reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
+ else
+ reg->stack_pop_level = STACK_POP_LEVEL_FREE;
+ }
+ }
+#ifdef USE_SUBEXP_CALL
+ else if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ onig_node_free(root);
+
+#ifdef ONIG_DEBUG_COMPILE
+#ifdef USE_NAMED_GROUP
+ onig_print_names(stderr, reg);
+#endif
+ print_compiled_byte_code_list(stderr, reg);
+#endif
+
+ end:
+ reg->state = ONIG_STATE_NORMAL;
+ return r;
+
+ err_unset:
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ err:
+ if (IS_NOT_NULL(scan_env.error)) {
+ if (IS_NOT_NULL(einfo)) {
+ einfo->enc = scan_env.enc;
+ einfo->par = scan_env.error;
+ einfo->par_end = scan_env.error_end;
+ }
+ }
+
+ if (IS_NOT_NULL(root)) onig_node_free(root);
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
+ xfree(scan_env.mem_nodes_dynamic);
+ return r;
+}
+
+#ifdef USE_RECOMPILE_API
+extern int
+onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+ regex_t *new_reg;
+
+ r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
+ if (r) return r;
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
+ }
+ else {
+ onig_chain_link_add(reg, new_reg);
+ }
+ return 0;
+}
+#endif
+
+static int onig_inited = 0;
+
+extern int
+onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
+ OnigEncoding enc, OnigSyntaxType* syntax)
+{
+ if (! onig_inited)
+ onig_init();
+
+ if (ONIGENC_IS_UNDEF(enc))
+ return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+
+ if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+ == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+ }
+
+ *reg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+ (*reg)->state = ONIG_STATE_MODIFY;
+
+ if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
+ option |= syntax->options;
+ option &= ~ONIG_OPTION_SINGLELINE;
+ }
+ else
+ option |= syntax->options;
+
+ (*reg)->enc = enc;
+ (*reg)->options = option;
+ (*reg)->syntax = syntax;
+ (*reg)->optimize = 0;
+ (*reg)->exact = (UChar* )NULL;
+ (*reg)->int_map = (int* )NULL;
+ (*reg)->int_map_backward = (int* )NULL;
+ (*reg)->chain = (regex_t* )NULL;
+
+ (*reg)->p = (UChar* )NULL;
+ (*reg)->alloc = 0;
+ (*reg)->used = 0;
+ (*reg)->name_table = (void* )NULL;
+
+ (*reg)->ambig_flag = ambig_flag;
+ (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
+
+ return 0;
+}
+
+extern int
+onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+ r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ enc, syntax);
+ if (r) return r;
+
+ r = onig_compile(*reg, pattern, pattern_end, einfo);
+ if (r) {
+ onig_free(*reg);
+ *reg = NULL;
+ }
+ return r;
+}
+
+extern int
+onig_init(void)
+{
+ if (onig_inited != 0)
+ return 0;
+
+ onig_inited = 1;
+
+ THREAD_SYSTEM_INIT;
+ THREAD_ATOMIC_START;
+
+ onigenc_init();
+ onigenc_set_default_caseconv_table((UChar* )0);
+
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_statistics_init();
+#endif
+
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+
+extern int
+onig_end(void)
+{
+ extern int onig_free_shared_cclass_table(void);
+
+ THREAD_ATOMIC_START;
+
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_print_statistics(stderr);
+#endif
+
+#ifdef USE_SHARED_CCLASS_TABLE
+ onig_free_shared_cclass_table();
+#endif
+
+#ifdef USE_RECYCLE_NODE
+ onig_free_node_list();
+#endif
+
+ onig_inited = 0;
+
+ THREAD_ATOMIC_END;
+ THREAD_SYSTEM_END;
+ return 0;
+}
+
+
+#ifdef ONIG_DEBUG
+
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_STATE_CHECK 6
+
+OnigOpInfoType OnigOpInfo[] = {
+ { OP_FINISH, "finish", ARG_NON },
+ { OP_END, "end", ARG_NON },
+ { OP_EXACT1, "exact1", ARG_SPECIAL },
+ { OP_EXACT2, "exact2", ARG_SPECIAL },
+ { OP_EXACT3, "exact3", ARG_SPECIAL },
+ { OP_EXACT4, "exact4", ARG_SPECIAL },
+ { OP_EXACT5, "exact5", ARG_SPECIAL },
+ { OP_EXACTN, "exactn", ARG_SPECIAL },
+ { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
+ { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
+ { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
+ { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
+ { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
+ { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
+ { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
+ { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
+ { OP_CCLASS, "cclass", ARG_SPECIAL },
+ { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
+ { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
+ { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
+ { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
+ { OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
+ { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
+ { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
+ { -1, "", ARG_NON }
+};
+
+static char*
+op2name(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].name;
+ }
+ return "";
+}
+
+static int
+op2arg_type(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].arg_type;
+ }
+ return ARG_SPECIAL;
+}
+
+static void
+Indent(FILE* f, int indent)
+{
+ int i;
+ for (i = 0; i < indent; i++) putc(' ', f);
+}
+
+static void
+p_string(FILE* f, int len, UChar* s)
+{
+ fputs(":", f);
+ while (len-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
+{
+ int x = len * mb_len;
+
+ fprintf(f, ":%d:", len);
+ while (x-- > 0) { fputc(*s++, f); }
+}
+
+extern void
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
+ OnigEncoding enc)
+{
+ int i, n, arg_type;
+ RelAddrType addr;
+ LengthType len;
+ MemNumType mem;
+ StateCheckNumType scn;
+ OnigCodePoint code;
+ UChar *q;
+
+ fprintf(f, "[%s", op2name(*bp));
+ arg_type = op2arg_type(*bp);
+ if (arg_type != ARG_SPECIAL) {
+ bp++;
+ switch (arg_type) {
+ case ARG_NON:
+ break;
+ case ARG_RELADDR:
+ GET_RELADDR_INC(addr, bp);
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_ABSADDR:
+ GET_ABSADDR_INC(addr, bp);
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_LENGTH:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+ case ARG_MEMNUM:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+ case ARG_OPTION:
+ {
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
+ }
+ break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
+ }
+ }
+ else {
+ switch (*bp++) {
+ case OP_EXACT1:
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p_string(f, 1, bp++); break;
+ case OP_EXACT2:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACT3:
+ p_string(f, 3, bp); bp += 3; break;
+ case OP_EXACT4:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACT5:
+ p_string(f, 5, bp); bp += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_EXACTMB2N1:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACTMB2N2:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACTMB2N3:
+ p_string(f, 6, bp); bp += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 2, bp);
+ bp += len * 2;
+ break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 3, bp);
+ bp += len * 3;
+ break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enc_len(enc, bp);
+ p_string(f, len, bp);
+ bp += len;
+ break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_CCLASS:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d", (int )code, len);
+ break;
+
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
+ break;
+
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%u:%d", (unsigned int )cc, n);
+ }
+ break;
+
+ case OP_BACKREFN_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_BACKREF_AT_LEVEL:
+ {
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ {
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
+ }
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":(%d)", addr);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
+ case OP_LOOK_BEHIND:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT:
+ GET_RELADDR_INC(addr, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:(%d)", len, addr);
+ break;
+
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%d)", scn, addr);
+ break;
+
+ default:
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
+ *--bp);
+ }
+ }
+ fputs("]", f);
+ if (nextp) *nextp = bp;
+}
+
+static void
+print_compiled_byte_code_list(FILE* f, regex_t* reg)
+{
+ int ncode;
+ UChar* bp = reg->p;
+ UChar* end = reg->p + reg->used;
+
+ fprintf(f, "code length: %d\n", reg->used);
+
+ ncode = 0;
+ while (bp < end) {
+ ncode++;
+ if (bp > reg->p) {
+ if (ncode % 5 == 0)
+ fprintf(f, "\n");
+ else
+ fputs(" ", f);
+ }
+ onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
+ }
+
+ fprintf(f, "\n");
+}
+
+static void
+print_indent_tree(FILE* f, Node* node, int indent)
+{
+ int i, type;
+ int add = 3;
+ UChar* p;
+
+ Indent(f, indent);
+ if (IS_NULL(node)) {
+ fprintf(f, "ERROR: null node!!!\n");
+ exit (0);
+ }
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ case N_ALT:
+ if (NTYPE(node) == N_LIST)
+ fprintf(f, "<list:%x>\n", (int )node);
+ else
+ fprintf(f, "<alt:%x>\n", (int )node);
+
+ print_indent_tree(f, NCONS(node).left, indent + add);
+ while (IS_NOT_NULL(node = NCONS(node).right)) {
+ if (NTYPE(node) != type) {
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ exit(0);
+ }
+ print_indent_tree(f, NCONS(node).left, indent + add);
+ }
+ break;
+
+ case N_STRING:
+ fprintf(f, "<string%s:%x>",
+ (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
+ for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
+ if (*p >= 0x20 && *p < 0x7f)
+ fputc(*p, f);
+ else {
+ fprintf(f, " 0x%02x", *p);
+ }
+ }
+ break;
+
+ case N_CCLASS:
+ fprintf(f, "<cclass:%x>", (int )node);
+ if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
+ if (NCCLASS(node).mbuf) {
+ BBuf* bbuf = NCCLASS(node).mbuf;
+ for (i = 0; i < bbuf->used; i++) {
+ if (i > 0) fprintf(f, ",");
+ fprintf(f, "%0x", bbuf->p[i]);
+ }
+ }
+ break;
+
+ case N_CTYPE:
+ fprintf(f, "<ctype:%x> ", (int )node);
+ switch (NCTYPE(node).type) {
+ case CTYPE_WORD: fputs("word", f); break;
+ case CTYPE_NOT_WORD: fputs("not word", f); break;
+ default:
+ fprintf(f, "ERROR: undefined ctype.\n");
+ exit(0);
+ }
+ break;
+
+ case N_ANYCHAR:
+ fprintf(f, "<anychar:%x>", (int )node);
+ break;
+
+ case N_ANCHOR:
+ fprintf(f, "<anchor:%x> ", (int )node);
+ switch (NANCHOR(node).type) {
+ case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
+ case ANCHOR_END_BUF: fputs("end buf", f); break;
+ case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
+ case ANCHOR_END_LINE: fputs("end line", f); break;
+ case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
+ case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
+
+ case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
+ case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
+ case ANCHOR_WORD_END: fputs("word end", f); break;
+#endif
+ case ANCHOR_PREC_READ: fputs("prec read", f); break;
+ case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
+ case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
+ case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
+
+ default:
+ fprintf(f, "ERROR: undefined anchor type.\n");
+ break;
+ }
+ break;
+
+ case N_BACKREF:
+ {
+ int* p;
+ BackrefNode* br = &(NBACKREF(node));
+ p = BACKREFS_P(br);
+ fprintf(f, "<backref:%x>", (int )node);
+ for (i = 0; i < br->back_num; i++) {
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", p[i]);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ {
+ CallNode* cn = &(NCALL(node));
+ fprintf(f, "<call:%x>", (int )node);
+ p_string(f, cn->name_end - cn->name, cn->name);
+ }
+ break;
+#endif
+
+ case N_QUANTIFIER:
+ fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
+ NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
+ (NQUANTIFIER(node).greedy ? "" : "?"));
+ print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
+ break;
+
+ case N_EFFECT:
+ fprintf(f, "<effect:%x> ", (int )node);
+ switch (NEFFECT(node).type) {
+ case EFFECT_OPTION:
+ fprintf(f, "option:%d\n", NEFFECT(node).option);
+ print_indent_tree(f, NEFFECT(node).target, indent + add);
+ break;
+ case EFFECT_MEMORY:
+ fprintf(f, "memory:%d", NEFFECT(node).regnum);
+ break;
+ case EFFECT_STOP_BACKTRACK:
+ fprintf(f, "stop-bt");
+ break;
+
+ default:
+ break;
+ }
+ fprintf(f, "\n");
+ print_indent_tree(f, NEFFECT(node).target, indent + add);
+ break;
+
+ default:
+ fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
+ break;
+ }
+
+ if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
+ type != N_EFFECT)
+ fprintf(f, "\n");
+ fflush(f);
+}
+#endif /* ONIG_DEBUG */
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void
+print_tree(FILE* f, Node* node)
+{
+ print_indent_tree(f, node, 0);
+}
+#endif
diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c
new file mode 100644
index 0000000..958917e
--- /dev/null
+++ b/ext/mbstring/oniguruma/regenc.c
@@ -0,0 +1,1028 @@
+/**********************************************************************
+ regenc.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
+
+extern int
+onigenc_init(void)
+{
+ return 0;
+}
+
+extern OnigEncoding
+onigenc_get_default_encoding(void)
+{
+ return OnigEncDefaultCharEncoding;
+}
+
+extern int
+onigenc_set_default_encoding(OnigEncoding enc)
+{
+ OnigEncDefaultCharEncoding = enc;
+ return 0;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+ if (p < s) {
+ p += enc_len(enc, p);
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
+ const UChar* start, const UChar* s, const UChar** prev)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+
+ if (p < s) {
+ if (prev) *prev = (const UChar* )p;
+ p += enc_len(enc, p);
+ }
+ else {
+ if (prev) *prev = (const UChar* )NULL; /* Sorry */
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+{
+ if (s <= start)
+ return (UChar* )NULL;
+
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+}
+
+extern UChar*
+onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
+{
+ while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
+ if (s <= start)
+ return (UChar* )NULL;
+
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
+ }
+ return (UChar* )s;
+}
+
+extern UChar*
+onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
+{
+ UChar* q = (UChar* )p;
+ while (n-- > 0) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q);
+ }
+ return (q <= end ? q : NULL);
+}
+
+extern int
+onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ int n = 0;
+ UChar* q = (UChar* )p;
+
+ while (q < end) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q);
+ n++;
+ }
+ return n;
+}
+
+extern int
+onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+{
+ int n = 0;
+ UChar* p = (UChar* )s;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return n;
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return n;
+ }
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ n++;
+ }
+}
+
+extern int
+onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+{
+ UChar* start = (UChar* )s;
+ UChar* p = (UChar* )s;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return (int )(p - start);
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return (int )(p - start);
+ }
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ }
+}
+
+#ifndef ONIG_RUBY_M17N
+
+#ifndef NOT_RUBY
+
+#define USE_APPLICATION_TO_LOWER_CASE_TABLE
+
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+#endif
+
+const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
+
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+static const UChar BuiltInAsciiToLowerCaseTable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
+
+#ifdef USE_UPPER_CASE_TABLE
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif
+
+const unsigned short OnigEncAsciiCtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+#ifdef USE_UPPER_CASE_TABLE
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
+};
+#endif
+
+extern void
+onigenc_set_default_caseconv_table(const UChar* table)
+{
+ if (table == (const UChar* )0) {
+#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
+ table = BuiltInAsciiToLowerCaseTable;
+#else
+ return ;
+#endif
+ }
+
+ if (table != OnigEncAsciiToLowerCaseTable) {
+ OnigEncAsciiToLowerCaseTable = table;
+ }
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+{
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
+}
+
+const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+ { 0x41, 0x61 },
+ { 0x42, 0x62 },
+ { 0x43, 0x63 },
+ { 0x44, 0x64 },
+ { 0x45, 0x65 },
+ { 0x46, 0x66 },
+ { 0x47, 0x67 },
+ { 0x48, 0x68 },
+ { 0x49, 0x69 },
+ { 0x4a, 0x6a },
+ { 0x4b, 0x6b },
+ { 0x4c, 0x6c },
+ { 0x4d, 0x6d },
+ { 0x4e, 0x6e },
+ { 0x4f, 0x6f },
+ { 0x50, 0x70 },
+ { 0x51, 0x71 },
+ { 0x52, 0x72 },
+ { 0x53, 0x73 },
+ { 0x54, 0x74 },
+ { 0x55, 0x75 },
+ { 0x56, 0x76 },
+ { 0x57, 0x77 },
+ { 0x58, 0x78 },
+ { 0x59, 0x79 },
+ { 0x5a, 0x7a },
+
+ { 0x61, 0x41 },
+ { 0x62, 0x42 },
+ { 0x63, 0x43 },
+ { 0x64, 0x44 },
+ { 0x65, 0x45 },
+ { 0x66, 0x46 },
+ { 0x67, 0x47 },
+ { 0x68, 0x48 },
+ { 0x69, 0x49 },
+ { 0x6a, 0x4a },
+ { 0x6b, 0x4b },
+ { 0x6c, 0x4c },
+ { 0x6d, 0x4d },
+ { 0x6e, 0x4e },
+ { 0x6f, 0x4f },
+ { 0x70, 0x50 },
+ { 0x71, 0x51 },
+ { 0x72, 0x52 },
+ { 0x73, 0x53 },
+ { 0x74, 0x54 },
+ { 0x75, 0x55 },
+ { 0x76, 0x56 },
+ { 0x77, 0x57 },
+ { 0x78, 0x58 },
+ { 0x79, 0x59 },
+ { 0x7a, 0x5a }
+};
+
+extern int
+onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+ }
+ else {
+ return 0;
+ }
+}
+
+extern int
+onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
+ const OnigCompAmbigCodes** ccs)
+{
+ return 0;
+}
+
+extern int
+onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+ }
+ else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+extern int
+onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
+ const OnigCompAmbigCodes** ccs)
+{
+ static const OnigCompAmbigCodes folds[] = {
+ { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = folds;
+ return sizeof(folds) / sizeof(OnigCompAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+extern int
+onigenc_not_support_get_ctype_code_range(int ctype,
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+ }
+ return 0;
+}
+
+/* for single byte encodings */
+extern int
+onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
+ UChar* lower)
+{
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
+ }
+ else {
+ *lower = **p;
+ }
+
+ (*p)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+extern int
+onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ else {
+ return FALSE;
+ }
+}
+
+extern int
+onigenc_single_byte_mbc_enc_len(const UChar* p)
+{
+ return 1;
+}
+
+extern OnigCodePoint
+onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(*p);
+}
+
+extern int
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
+{
+ return 1;
+}
+
+extern int
+onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
+{
+ return (code & 0xff);
+}
+
+extern int
+onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ *buf = (UChar )(code & 0xff);
+ return 1;
+}
+
+extern UChar*
+onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ return (UChar* )s;
+}
+
+extern int
+onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return TRUE;
+}
+
+extern int
+onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return FALSE;
+}
+
+extern OnigCodePoint
+onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enc_len(enc, p);
+ n = (OnigCodePoint )(*p++);
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+extern int
+onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ int len;
+ const UChar *p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1;
+ }
+ else {
+ len = enc_len(enc, p);
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted to lower char */
+ }
+}
+
+extern int
+onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ (*pp) += enc_len(enc, p);
+ return FALSE;
+}
+
+extern int
+onigenc_mb2_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb4_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff000000) != 0) {
+ first = (code >> 24) & 0xff;
+ }
+ else if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+
+extern int
+onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff000000) != 0) {
+ *p++ = (UChar )((code >> 24) & 0xff);
+ }
+ if ((code & 0xff0000) != 0 || p != buf) {
+ *p++ = (UChar )((code >> 16) & 0xff);
+ }
+ if ((code & 0xff00) != 0 || p != buf) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(enc, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+extern int
+onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+extern int
+onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+extern int
+onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
+ const UChar* sascii /* ascii */, int n)
+{
+ int x, c;
+
+ while (n-- > 0) {
+ if (p >= end) return (int )(*sascii);
+
+ c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
+ x = *sascii - c;
+ if (x) return x;
+
+ sascii++;
+ p += enc_len(enc, p);
+ }
+ return 0;
+}
+
+#else /* ONIG_RUBY_M17N */
+
+extern int
+onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
+{
+ switch (ctype) {
+ case ONIGENC_CTYPE_NEWLINE:
+ if (code == 0x0a) return 1;
+ break;
+
+ case ONIGENC_CTYPE_ALPHA:
+ return m17n_isalpha(enc, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return m17n_iscntrl(enc, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return m17n_isdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return m17n_islower(enc, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return m17n_isprint(enc, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return m17n_ispunct(enc, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return m17n_isspace(enc, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return m17n_isupper(enc, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return m17n_isxdigit(enc, code);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return m17n_iswchar(enc, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return (code < 128 ? TRUE : FALSE);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return m17n_isalnum(enc, code);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ int c, len;
+
+ m17n_mbcput(enc, code, buf);
+ c = m17n_firstbyte(enc, code);
+ len = enc_len(enc, c);
+ return len;
+}
+
+extern int
+onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
+{
+ unsigned int c, low;
+
+ c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+ low = m17n_tolower(enc, c);
+ m17n_mbcput(enc, low, buf);
+
+ return m17n_codelen(enc, low);
+}
+
+extern int
+onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+ UChar** pp, UChar* end)
+{
+ int len;
+ unsigned int c;
+ UChar* p = *pp;
+
+ len = enc_len(enc, *p);
+ (*pp) += len;
+ c = m17n_codepoint(enc, p, p + len);
+
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ if (m17n_isupper(enc, c) || m17n_islower(enc, c))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+{
+ UChar *p;
+ int len;
+
+ if (s <= start) return s;
+ p = s;
+
+ while (!m17n_islead(enc, *p) && p > start) p--;
+ while (p + (len = enc_len(enc, *p)) < s) {
+ p += len;
+ }
+ if (p + len == s) return s;
+ return p;
+}
+
+extern int
+onigenc_is_allowed_reverse_match(OnigEncoding enc,
+ const UChar* s, const UChar* end)
+{
+ return ONIGENC_IS_SINGLEBYTE(enc);
+}
+
+extern void
+onigenc_set_default_caseconv_table(UChar* table) { }
+
+#endif /* ONIG_RUBY_M17N */
diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h
new file mode 100644
index 0000000..58ee3e7
--- /dev/null
+++ b/ext/mbstring/oniguruma/regenc.h
@@ -0,0 +1,147 @@
+#ifndef REGENC_H
+#define REGENC_H
+/**********************************************************************
+ regenc.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef RUBY_PLATFORM
+#include "config.h"
+#endif
+#include "oniguruma.h"
+
+#ifndef NULL
+#define NULL ((void* )0)
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/* error codes */
+#define ONIGENCERR_MEMORY -5
+#define ONIGENCERR_TYPE_BUG -6
+#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401
+
+#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
+#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
+#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+
+
+#ifdef ONIG_RUBY_M17N
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
+
+#else /* ONIG_RUBY_M17N */
+
+#define USE_UNICODE_FULL_RANGE_CTYPE
+/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+
+/* for encoding system implementation (internal) */
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
+
+/* methods for single byte encoding */
+ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
+ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
+ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+
+/* methods for multi byte encoding */
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
+ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
+ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+
+ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
+
+/* in enc/unicode.c */
+ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+
+
+#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
+ OnigEncISO_8859_1_ToLowerCaseTable[c]
+#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
+ OnigEncISO_8859_1_ToUpperCaseTable[c]
+#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
+ ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
+
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
+
+#endif /* is not ONIG_RUBY_M17N */
+
+ONIG_EXTERN int
+onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
+ONIG_EXTERN UChar*
+onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
+
+/* defined in regexec.c, but used in enc/xxx.c */
+extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
+
+#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
+#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
+#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
+ ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
+#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
+
+#endif /* REGENC_H */
diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c
new file mode 100644
index 0000000..d6ec918
--- /dev/null
+++ b/ext/mbstring/oniguruma/regerror.c
@@ -0,0 +1,371 @@
+/**********************************************************************
+ regerror.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+#include <stdio.h> /* for vsnprintf() */
+
+#ifdef HAVE_STDARG_PROTOTYPES
+#include <stdarg.h>
+#define va_init_list(a,b) va_start(a,b)
+#else
+#include <varargs.h>
+#define va_init_list(a,b) va_start(a)
+#endif
+
+extern UChar*
+onig_error_code_to_format(int code)
+{
+ char *p;
+
+ if (code >= 0) return (UChar* )0;
+
+ switch (code) {
+ case ONIG_MISMATCH:
+ p = "mismatch"; break;
+ case ONIG_NO_SUPPORT_CONFIG:
+ p = "no support in this configuration"; break;
+ case ONIGERR_MEMORY:
+ p = "fail to memory allocation"; break;
+ case ONIGERR_MATCH_STACK_LIMIT_OVER:
+ p = "match-stack limit over"; break;
+ case ONIGERR_TYPE_BUG:
+ p = "undefined type (bug)"; break;
+ case ONIGERR_PARSER_BUG:
+ p = "internal parser error (bug)"; break;
+ case ONIGERR_STACK_BUG:
+ p = "stack error (bug)"; break;
+ case ONIGERR_UNDEFINED_BYTECODE:
+ p = "undefined bytecode (bug)"; break;
+ case ONIGERR_UNEXPECTED_BYTECODE:
+ p = "unexpected bytecode (bug)"; break;
+ case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
+ p = "default multibyte-encoding is not setted"; break;
+ case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
+ p = "can't convert to wide-char on specified multibyte-encoding"; break;
+ case ONIGERR_INVALID_ARGUMENT:
+ p = "invalid argument"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
+ p = "end pattern at left brace"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
+ p = "end pattern at left bracket"; break;
+ case ONIGERR_EMPTY_CHAR_CLASS:
+ p = "empty char-class"; break;
+ case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
+ p = "premature end of char-class"; break;
+ case ONIGERR_END_PATTERN_AT_ESCAPE:
+ p = "end pattern at escape"; break;
+ case ONIGERR_END_PATTERN_AT_META:
+ p = "end pattern at meta"; break;
+ case ONIGERR_END_PATTERN_AT_CONTROL:
+ p = "end pattern at control"; break;
+ case ONIGERR_META_CODE_SYNTAX:
+ p = "illegal meta-code syntax"; break;
+ case ONIGERR_CONTROL_CODE_SYNTAX:
+ p = "illegal control-code syntax"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
+ p = "char-class value at end of range"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
+ p = "char-class value at start of range"; break;
+ case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
+ p = "unmatched range specifier in char-class"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
+ p = "target of repeat operator is not specified"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
+ p = "target of repeat operator is invalid"; break;
+ case ONIGERR_NESTED_REPEAT_OPERATOR:
+ p = "nested repeat operator"; break;
+ case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
+ p = "unmatched close parenthesis"; break;
+ case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
+ p = "end pattern with unmatched parenthesis"; break;
+ case ONIGERR_END_PATTERN_IN_GROUP:
+ p = "end pattern in group"; break;
+ case ONIGERR_UNDEFINED_GROUP_OPTION:
+ p = "undefined group option"; break;
+ case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
+ p = "invalid POSIX bracket type"; break;
+ case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
+ p = "invalid pattern in look-behind"; break;
+ case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
+ p = "invalid repeat range {lower,upper}"; break;
+ case ONIGERR_TOO_BIG_NUMBER:
+ p = "too big number"; break;
+ case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
+ p = "too big number for repeat range"; break;
+ case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
+ p = "upper is smaller than lower in repeat range"; break;
+ case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
+ p = "empty range in char class"; break;
+ case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
+ p = "mismatch multibyte code length in char-class range"; break;
+ case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
+ p = "too many multibyte code ranges are specified"; break;
+ case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
+ p = "too short multibyte code string"; break;
+ case ONIGERR_TOO_BIG_BACKREF_NUMBER:
+ p = "too big backref number"; break;
+ case ONIGERR_INVALID_BACKREF:
+#ifdef USE_NAMED_GROUP
+ p = "invalid backref number/name"; break;
+#else
+ p = "invalid backref number"; break;
+#endif
+ case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
+ p = "numbered backref/call is not allowed. (use name)"; break;
+ case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
+ p = "too big wide-char value"; break;
+ case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
+ p = "too long wide-char value"; break;
+ case ONIGERR_INVALID_WIDE_CHAR_VALUE:
+ p = "invalid wide-char value"; break;
+ case ONIGERR_EMPTY_GROUP_NAME:
+ p = "group name is empty"; break;
+ case ONIGERR_INVALID_GROUP_NAME:
+ p = "invalid group name <%n>"; break;
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+#ifdef USE_NAMED_GROUP
+ p = "invalid char in group name <%n>"; break;
+#else
+ p = "invalid char in group number <%n>"; break;
+#endif
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ p = "undefined name <%n> reference"; break;
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ p = "undefined group <%n> reference"; break;
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ p = "multiplex defined name <%n>"; break;
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ p = "multiplex definition name <%n> call"; break;
+ case ONIGERR_NEVER_ENDING_RECURSION:
+ p = "never ending recursion"; break;
+ case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
+ p = "group number is too big for capture history"; break;
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ p = "invalid character property name {%n}"; break;
+ case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
+ p = "not supported encoding combination"; break;
+ case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
+ p = "invalid combination of options"; break;
+ case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
+ p = "over thread pass limit count"; break;
+
+ default:
+ p = "undefined error code"; break;
+ }
+
+ return (UChar* )p;
+}
+
+
+static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
+ UChar buf[], int buf_size, int *is_over)
+{
+ int len;
+ UChar *p;
+ OnigCodePoint code;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ p = s;
+ len = 0;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ if (len + 5 <= buf_size) {
+ sprintf((char* )(&(buf[len])), "\\%03o",
+ (unsigned int)(code & 0377));
+ len += 5;
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ buf[len++] = (UChar )code;
+ }
+
+ p += enc_len(enc, p);
+ if (len >= buf_size) break;
+ }
+
+ *is_over = ((p < end) ? 1 : 0);
+ }
+ else {
+ len = MIN((end - s), buf_size);
+ xmemcpy(buf, s, (size_t )len);
+ *is_over = ((buf_size < (end - s)) ? 1 : 0);
+ }
+
+ return len;
+}
+
+
+/* for ONIG_MAX_ERROR_MESSAGE_LEN */
+#define MAX_ERROR_PAR_LEN 30
+
+extern int
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_error_code_to_str(UChar* s, int code, ...)
+#else
+onig_error_code_to_str(s, code, va_alist)
+ UChar* s;
+ int code;
+ va_dcl
+#endif
+{
+ UChar *p, *q;
+ OnigErrorInfo* einfo;
+ int len, is_over;
+ UChar parbuf[MAX_ERROR_PAR_LEN];
+ va_list vargs;
+
+ va_init_list(vargs, code);
+
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ einfo = va_arg(vargs, OnigErrorInfo*);
+ len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
+ parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
+ q = onig_error_code_to_format(code);
+ p = s;
+ while (*q != '\0') {
+ if (*q == '%') {
+ q++;
+ if (*q == 'n') { /* '%n': name */
+ xmemcpy(p, parbuf, len);
+ p += len;
+ if (is_over != 0) {
+ xmemcpy(p, "...", 3);
+ p += 3;
+ }
+ q++;
+ }
+ else
+ goto normal_char;
+ }
+ else {
+ normal_char:
+ *p++ = *q++;
+ }
+ }
+ *p = '\0';
+ len = p - s;
+ break;
+
+ default:
+ q = onig_error_code_to_format(code);
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
+ xmemcpy(s, q, len);
+ s[len] = '\0';
+ break;
+ }
+
+ va_end(vargs);
+ return len;
+}
+
+
+void
+#ifdef HAVE_STDARG_PROTOTYPES
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, ...)
+#else
+onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
+ UChar buf[];
+ int bufsize;
+ OnigEncoding enc;
+ UChar* pat;
+ UChar* pat_end;
+ const UChar *fmt;
+ va_dcl
+#endif
+{
+ int n, need, len;
+ UChar *p, *s, *bp;
+ UChar bs[6];
+ va_list args;
+
+ va_init_list(args, fmt);
+ n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
+ va_end(args);
+
+ need = (pat_end - pat) * 4 + 4;
+
+ if (n + need < bufsize) {
+ strcat((char* )buf, ": /");
+ s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
+
+ p = pat;
+ while (p < pat_end) {
+ if (*p == MC_ESC(enc)) {
+ *s++ = *p++;
+ len = enc_len(enc, p);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (*p == '/') {
+ *s++ = (unsigned char )MC_ESC(enc);
+ *s++ = *p++;
+ }
+ else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
+ len = enc_len(enc, p);
+ if (ONIGENC_MBC_MINLEN(enc) == 1) {
+ while (len-- > 0) *s++ = *p++;
+ }
+ else { /* for UTF16 */
+ int blen;
+
+ while (len-- > 0) {
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
+ blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (blen-- > 0) *s++ = *bp++;
+ }
+ }
+ }
+ else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
+ !ONIGENC_IS_CODE_SPACE(enc, *p)) {
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (len-- > 0) *s++ = *bp++;
+ }
+ else {
+ *s++ = *p++;
+ }
+ }
+
+ *s++ = '/';
+ *s = '\0';
+ }
+}
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
new file mode 100644
index 0000000..918aa67
--- /dev/null
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -0,0 +1,3949 @@
+/**********************************************************************
+ regexec.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+ ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+#endif
+
+#ifdef USE_CAPTURE_HISTORY
+static void history_tree_free(OnigCaptureTreeNode* node);
+
+static void
+history_tree_clear(OnigCaptureTreeNode* node)
+{
+ int i;
+
+ if (IS_NOT_NULL(node)) {
+ for (i = 0; i < node->num_childs; i++) {
+ if (IS_NOT_NULL(node->childs[i])) {
+ history_tree_free(node->childs[i]);
+ }
+ }
+ for (i = 0; i < node->allocated; i++) {
+ node->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ node->num_childs = 0;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+ node->group = -1;
+ }
+}
+
+static void
+history_tree_free(OnigCaptureTreeNode* node)
+{
+ history_tree_clear(node);
+ xfree(node);
+}
+
+static void
+history_root_free(OnigRegion* r)
+{
+ if (IS_NOT_NULL(r->history_root)) {
+ history_tree_free(r->history_root);
+ r->history_root = (OnigCaptureTreeNode* )0;
+ }
+}
+
+static OnigCaptureTreeNode*
+history_node_new(void)
+{
+ OnigCaptureTreeNode* node;
+
+ node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
+ CHECK_NULL_RETURN(node);
+ node->childs = (OnigCaptureTreeNode** )0;
+ node->allocated = 0;
+ node->num_childs = 0;
+ node->group = -1;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+
+ return node;
+}
+
+static int
+history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
+{
+#define HISTORY_TREE_INIT_ALLOC_SIZE 8
+
+ if (parent->num_childs >= parent->allocated) {
+ int n, i;
+
+ if (IS_NULL(parent->childs)) {
+ n = HISTORY_TREE_INIT_ALLOC_SIZE;
+ parent->childs =
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ }
+ else {
+ n = parent->allocated * 2;
+ parent->childs =
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
+ }
+ CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
+ for (i = parent->allocated; i < n; i++) {
+ parent->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ parent->allocated = n;
+ }
+
+ parent->childs[parent->num_childs] = child;
+ parent->num_childs++;
+ return 0;
+}
+
+static OnigCaptureTreeNode*
+history_tree_clone(OnigCaptureTreeNode* node)
+{
+ int i;
+ OnigCaptureTreeNode *clone, *child;
+
+ clone = history_node_new();
+ CHECK_NULL_RETURN(clone);
+
+ clone->beg = node->beg;
+ clone->end = node->end;
+ for (i = 0; i < node->num_childs; i++) {
+ child = history_tree_clone(node->childs[i]);
+ if (IS_NULL(child)) {
+ history_tree_free(clone);
+ return (OnigCaptureTreeNode* )0;
+ }
+ history_tree_add_child(clone, child);
+ }
+
+ return clone;
+}
+
+extern OnigCaptureTreeNode*
+onig_get_capture_tree(OnigRegion* region)
+{
+ return region->history_root;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
+extern void
+onig_region_clear(OnigRegion* region)
+{
+ int i;
+
+ for (i = 0; i < region->num_regs; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(region);
+#endif
+}
+
+extern int
+onig_region_resize(OnigRegion* region, int n)
+{
+ region->num_regs = n;
+
+ if (n < ONIG_NREGION)
+ n = ONIG_NREGION;
+
+ if (region->allocated == 0) {
+ region->beg = (int* )xmalloc(n * sizeof(int));
+ region->end = (int* )xmalloc(n * sizeof(int));
+
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = n;
+ }
+ else if (region->allocated < n) {
+ region->beg = (int* )xrealloc(region->beg, n * sizeof(int));
+ region->end = (int* )xrealloc(region->end, n * sizeof(int));
+
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = n;
+ }
+
+ return 0;
+}
+
+extern int
+onig_region_resize_clear(OnigRegion* region, int n)
+{
+ int r;
+
+ r = onig_region_resize(region, n);
+ if (r != 0) return r;
+ onig_region_clear(region);
+ return 0;
+}
+
+extern int
+onig_region_set(OnigRegion* region, int at, int beg, int end)
+{
+ if (at < 0) return ONIGERR_INVALID_ARGUMENT;
+
+ if (at >= region->allocated) {
+ int r = onig_region_resize(region, at + 1);
+ if (r < 0) return r;
+ }
+
+ region->beg[at] = beg;
+ region->end[at] = end;
+ return 0;
+}
+
+extern void
+onig_region_init(OnigRegion* region)
+{
+ region->num_regs = 0;
+ region->allocated = 0;
+ region->beg = (int* )0;
+ region->end = (int* )0;
+ region->history_root = (OnigCaptureTreeNode* )0;
+}
+
+extern OnigRegion*
+onig_region_new(void)
+{
+ OnigRegion* r;
+
+ r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
+ onig_region_init(r);
+ return r;
+}
+
+extern void
+onig_region_free(OnigRegion* r, int free_self)
+{
+ if (r) {
+ if (r->allocated > 0) {
+ if (r->beg) xfree(r->beg);
+ if (r->end) xfree(r->end);
+ r->allocated = 0;
+ }
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(r);
+#endif
+ if (free_self) xfree(r);
+ }
+}
+
+extern void
+onig_region_copy(OnigRegion* to, OnigRegion* from)
+{
+#define RREGC_SIZE (sizeof(int) * from->num_regs)
+ int i;
+
+ if (to == from) return;
+
+ if (to->allocated == 0) {
+ if (from->num_regs > 0) {
+ to->beg = (int* )xmalloc(RREGC_SIZE);
+ to->end = (int* )xmalloc(RREGC_SIZE);
+ to->allocated = from->num_regs;
+ }
+ }
+ else if (to->allocated < from->num_regs) {
+ to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
+ to->end = (int* )xrealloc(to->end, RREGC_SIZE);
+ to->allocated = from->num_regs;
+ }
+
+ for (i = 0; i < from->num_regs; i++) {
+ to->beg[i] = from->beg[i];
+ to->end[i] = from->end[i];
+ }
+ to->num_regs = from->num_regs;
+
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(to);
+
+ if (IS_NOT_NULL(from->history_root)) {
+ to->history_root = history_tree_clone(from->history_root);
+ }
+#endif
+}
+
+
+/** stack **/
+#define INVALID_STACK_INDEX -1
+typedef long StackIndex;
+
+typedef struct _StackType {
+ unsigned int type;
+ union {
+ struct {
+ UChar *pcode; /* byte code position */
+ UChar *pstr; /* string position */
+ UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
+ } state;
+ struct {
+ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ UChar *pcode; /* byte code position (head of repeated target) */
+ int num; /* repeat id */
+ } repeat;
+ struct {
+ StackIndex si; /* index of stack */
+ } repeat_inc;
+ struct {
+ int num; /* memory num */
+ UChar *pstr; /* start/end position */
+ /* Following information is setted, if this stack type is MEM-START */
+ StackIndex start; /* prev. info (for backtrack "(...)*" ) */
+ StackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ } mem;
+ struct {
+ int num; /* null check id */
+ UChar *pstr; /* start position */
+ } null_check;
+#ifdef USE_SUBEXP_CALL
+ struct {
+ UChar *ret_addr; /* byte code position */
+ int num; /* null check id */
+ UChar *pstr; /* string position */
+ } call_frame;
+#endif
+ } u;
+} StackType;
+
+/* stack type */
+/* used by normal-POP */
+#define STK_ALT 0x0001
+#define STK_LOOK_BEHIND_NOT 0x0002
+#define STK_POS_NOT 0x0003
+/* handled by normal-POP */
+#define STK_MEM_START 0x0100
+#define STK_MEM_END 0x8200
+#define STK_REPEAT_INC 0x0300
+#define STK_STATE_CHECK_MARK 0x1000
+/* avoided by normal-POP */
+#define STK_NULL_CHECK_START 0x3000
+#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_MEM_END_MARK 0x8400
+#define STK_POS 0x0500 /* used when POP-POS */
+#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0700
+#define STK_CALL_FRAME 0x0800
+#define STK_RETURN 0x0900
+#define STK_VOID 0x0a00 /* for fill a blank */
+
+/* stack type check mask */
+#define STK_MASK_POP_USED 0x00ff
+#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
+
+typedef struct {
+ void* stack_p;
+ int stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ int best_len; /* for ONIG_OPTION_FIND_LONGEST */
+ UChar* best_s;
+#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ void* state_check_buff;
+ int state_check_buff_size;
+#endif
+} MatchArg;
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).best_len = ONIG_MISMATCH;\
+} while (0)
+#else
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+} while (0)
+#endif
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
+
+#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
+ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
+ unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
+ offset = ((offset) * (state_num)) >> 3;\
+ if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
+ (msa).state_check_buff = (void* )xmalloc(size);\
+ else \
+ (msa).state_check_buff = (void* )xalloca(size);\
+ xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
+ (size_t )(size - (offset))); \
+ (msa).state_check_buff_size = size;\
+ }\
+ else {\
+ (msa).state_check_buff = (void* )0;\
+ (msa).state_check_buff_size = 0;\
+ }\
+ }\
+ else {\
+ (msa).state_check_buff = (void* )0;\
+ (msa).state_check_buff_size = 0;\
+ }\
+} while (0)
+
+#define MATCH_ARG_FREE(msa) do {\
+ if ((msa).stack_p) xfree((msa).stack_p);\
+ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
+ if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+ }\
+} while (0);
+#else
+#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
+#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#endif
+
+
+
+#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
+ if (msa->stack_p) {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
+ stk_alloc = (StackType* )(msa->stack_p);\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + msa->stack_n;\
+ }\
+ else {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
+ + sizeof(StackType) * (stack_num));\
+ stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + (stack_num);\
+ }\
+} while(0)
+
+#define STACK_SAVE do{\
+ if (stk_base != stk_alloc) {\
+ msa->stack_p = stk_base;\
+ msa->stack_n = stk_end - stk_base;\
+ };\
+} while(0)
+
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
+static int
+stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
+ StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
+{
+ unsigned int n;
+ StackType *x, *stk_base, *stk_end, *stk;
+
+ stk_base = *arg_stk_base;
+ stk_end = *arg_stk_end;
+ stk = *arg_stk;
+
+ n = stk_end - stk_base;
+ if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
+ x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ xmemcpy(x, stk_base, n * sizeof(StackType));
+ n *= 2;
+ }
+ else {
+ n *= 2;
+ if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
+ if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = MatchStackLimitSize;
+ }
+ x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ }
+ *arg_stk = x + (stk - stk_base);
+ *arg_stk_base = x;
+ *arg_stk_end = x + n;
+ return 0;
+}
+
+#define STACK_ENSURE(n) do {\
+ if (stk_end - stk < (n)) {\
+ int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
+ if (r != 0) { STACK_SAVE; return r; } \
+ }\
+} while(0)
+
+#define STACK_AT(index) (stk_base + (index))
+#define GET_STACK_INDEX(stk) ((stk) - stk_base)
+
+#define STACK_PUSH_TYPE(stack_type) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ STACK_INC;\
+} while(0)
+
+#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define STATE_CHECK_POS(s,snum) \
+ (((s) - str) * num_comb_exp_check + ((snum) - 1))
+#define STATE_CHECK_VAL(v,snum) do {\
+ if (state_check_buff != NULL) {\
+ int x = STATE_CHECK_POS(s,snum);\
+ (v) = state_check_buff[x/8] & (1<<(x%8));\
+ }\
+ else (v) = 0;\
+} while(0)
+
+
+#define ELSE_IF_STATE_CHECK_MARK(stk) \
+ else if ((stk)->type == STK_STATE_CHECK_MARK) { \
+ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
+ state_check_buff[x/8] |= (1<<(x%8)); \
+ }
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.state_check = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_ALT;\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+ if (state_check_buff != NULL) {\
+ STACK_ENSURE(1);\
+ stk->type = STK_STATE_CHECK_MARK;\
+ stk->u.state.pstr = (s);\
+ stk->u.state.state_check = (snum);\
+ STACK_INC;\
+ }\
+} while(0)
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define ELSE_IF_STATE_CHECK_MARK(stk)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ STACK_INC;\
+} while(0)
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
+#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
+#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
+#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
+#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
+ STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
+
+#define STACK_PUSH_REPEAT(id, pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT;\
+ stk->u.repeat.num = (id);\
+ stk->u.repeat.pcode = (pat);\
+ stk->u.repeat.count = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_REPEAT_INC(sindex) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT_INC;\
+ stk->u.repeat_inc.si = (sindex);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_START(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_START;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END_MARK(mnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END_MARK;\
+ stk->u.mem.num = (mnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_GET_MEM_START(mnum, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
+ && k->u.mem.num == (mnum)) {\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) break;\
+ level--;\
+ }\
+ }\
+} while (0)
+
+#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
+ int level = 0;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) (start) = k->u.mem.pstr;\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
+ level--;\
+ if (level == 0) {\
+ (end) = k->u.mem.pstr;\
+ break;\
+ }\
+ }\
+ k++;\
+ }\
+} while (0)
+
+#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_START;\
+ stk->u.null_check.num = (cnum);\
+ stk->u.null_check.pstr = (s);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_END;\
+ stk->u.null_check.num = (cnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_CALL_FRAME(pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALL_FRAME;\
+ stk->u.call_frame.ret_addr = (pat);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_RETURN do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_RETURN;\
+ STACK_INC;\
+} while(0)
+
+
+#ifdef ONIG_DEBUG
+#define STACK_BASE_CHECK(p, at) \
+ if ((p) < stk_base) {\
+ fprintf(stderr, "at %s\n", at);\
+ goto stack_error;\
+ }
+#else
+#define STACK_BASE_CHECK(p, at)
+#endif
+
+#define STACK_POP_ONE do {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
+} while(0)
+
+#define STACK_POP do {\
+ switch (pop_level) {\
+ case STACK_POP_LEVEL_FREE:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ case STACK_POP_LEVEL_MEM_START:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP 2"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ default:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP 3"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_POS_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
+ if (stk->type == STK_POS_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
+ if (stk->type == STK_LOOK_BEHIND_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+} while(0)
+
+#define STACK_POS_END(k) do {\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_POS_END"); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_POS) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_STOP_BT_END do {\
+ StackType *k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_STOP_BT) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK(isnull,id,s) do {\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ else level--;\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ else {\
+ level--;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ if (k->u.null_check.num == (id)) level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_GET_REPEAT(id, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
+ if (k->type == STK_REPEAT) {\
+ if (level == 0) {\
+ if (k->u.repeat.num == (id)) {\
+ break;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME) level--;\
+ else if (k->type == STK_RETURN) level++;\
+ }\
+} while (0)
+
+#define STACK_RETURN(addr) do {\
+ int level = 0;\
+ StackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_RETURN"); \
+ if (k->type == STK_CALL_FRAME) {\
+ if (level == 0) {\
+ (addr) = k->u.call_frame.ret_addr;\
+ break;\
+ }\
+ else level--;\
+ }\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ }\
+} while(0)
+
+
+#define STRING_CMP(s1,s2,len) do {\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) goto fail;\
+ }\
+} while(0)
+
+#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+ goto fail; \
+} while(0)
+
+static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
+ UChar* s1, UChar** ps2, int mblen)
+{
+ UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *p1, *p2, *end, *s2, *end2;
+ int len1, len2;
+
+ s2 = *ps2;
+ end = s1 + mblen;
+ end2 = s2 + mblen;
+ while (s1 < end) {
+ len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
+ len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
+ if (len1 != len2) return 0;
+ p1 = buf1;
+ p2 = buf2;
+ while (len1-- > 0) {
+ if (*p1 != *p2) return 0;
+ p1++;
+ p2++;
+ }
+ }
+
+ *ps2 = s2;
+ return 1;
+}
+
+#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
+ is_fail = 0;\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) {\
+ is_fail = 1; break;\
+ }\
+ }\
+} while(0)
+
+#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+ is_fail = 1; \
+ else \
+ is_fail = 0; \
+} while(0)
+
+
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
+#define IS_EMPTY_STR (str == end)
+
+#define DATA_ENSURE(n) \
+ if (s + (n) > end) goto fail
+
+#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+
+#ifdef USE_CAPTURE_HISTORY
+static int
+make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
+ StackType* stk_top, UChar* str, regex_t* reg)
+{
+ int n, r;
+ OnigCaptureTreeNode* child;
+ StackType* k = *kp;
+
+ while (k < stk_top) {
+ if (k->type == STK_MEM_START) {
+ n = k->u.mem.num;
+ if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
+ child->group = n;
+ child->beg = (int )(k->u.mem.pstr - str);
+ r = history_tree_add_child(node, child);
+ if (r != 0) return r;
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = (int )(k->u.mem.pstr - str);
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (k->u.mem.num == node->group) {
+ node->end = (int )(k->u.mem.pstr - str);
+ *kp = k;
+ return 0;
+ }
+ }
+ k++;
+ }
+
+ return 1; /* 1: root node ending. */
+}
+#endif
+
+#ifdef USE_BACKREF_AT_LEVEL
+static int mem_is_in_memp(int mem, int num, UChar* memp)
+{
+ int i;
+ MemNumType m;
+
+ for (i = 0; i < num; i++) {
+ GET_MEMNUM_INC(m, memp);
+ if (mem == (int )m) return 1;
+ }
+ return 0;
+}
+
+static int backref_match_at_nested_level(regex_t* reg
+ , StackType* top, StackType* stk_base
+ , int ignore_case, int ambig_flag
+ , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+{
+ UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
+ int level;
+ StackType* k;
+
+ level = 0;
+ k = top;
+ k--;
+ while (k >= stk_base) {
+ if (k->type == STK_CALL_FRAME) {
+ level--;
+ }
+ else if (k->type == STK_RETURN) {
+ level++;
+ }
+ else if (level == nest) {
+ if (k->type == STK_MEM_START) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pstart = k->u.mem.pstr;
+ if (pend != NULL_UCHARP) {
+ if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+ p = pstart;
+ ss = *s;
+
+ if (ignore_case != 0) {
+ if (string_cmp_ic(reg->enc, ambig_flag,
+ pstart, &ss, (int )(pend - pstart)) == 0)
+ return 0; /* or goto next_mem; */
+ }
+ else {
+ while (p < pend) {
+ if (*p++ != *ss++) return 0; /* or goto next_mem; */
+ }
+ }
+
+ *s = ss;
+ return 1;
+ }
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pend = k->u.mem.pstr;
+ }
+ }
+ }
+ k--;
+ }
+
+ return 0;
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
+
+#ifdef RUBY_PLATFORM
+
+typedef struct {
+ int state;
+ regex_t* reg;
+ MatchArg* msa;
+ StackType* stk_base;
+} TrapEnsureArg;
+
+static VALUE
+trap_ensure(VALUE arg)
+{
+ TrapEnsureArg* ta = (TrapEnsureArg* )arg;
+
+ if (ta->state == 0) { /* trap_exec() is not normal return */
+ ONIG_STATE_DEC_THREAD(ta->reg);
+ if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
+ xfree(ta->stk_base);
+
+ MATCH_ARG_FREE(*(ta->msa));
+ }
+
+ return Qnil;
+}
+
+static VALUE
+trap_exec(VALUE arg)
+{
+ TrapEnsureArg* ta;
+
+ rb_trap_exec();
+
+ ta = (TrapEnsureArg* )arg;
+ ta->state = 1; /* normal return */
+ return Qnil;
+}
+
+extern void
+onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
+{
+ VALUE arg;
+ TrapEnsureArg ta;
+
+ ta.state = 0;
+ ta.reg = reg;
+ ta.msa = msa;
+ ta.stk_base = stk_base;
+ arg = (VALUE )(&ta);
+ rb_ensure(trap_exec, arg, trap_ensure, arg);
+}
+
+#define CHECK_INTERRUPT_IN_MATCH_AT do {\
+ if (rb_trap_pending) {\
+ if (! rb_prohibit_interrupt) {\
+ onig_exec_trap(reg, msa, stk_base);\
+ }\
+ }\
+} while (0)
+#else
+#define CHECK_INTERRUPT_IN_MATCH_AT
+#endif /* RUBY_PLATFORM */
+
+#ifdef ONIG_DEBUG_STATISTICS
+
+#define USE_TIMEOFDAY
+
+#ifdef USE_TIMEOFDAY
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+static struct timeval ts, te;
+#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
+#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
+ (((te).tv_sec - (ts).tv_sec)*1000000))
+#else
+#ifdef HAVE_SYS_TIMES_H
+#include <sys/times.h>
+#endif
+static struct tms ts, te;
+#define GETTIME(t) times(&(t))
+#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
+#endif
+
+static int OpCounter[256];
+static int OpPrevCounter[256];
+static unsigned long OpTime[256];
+static int OpCurr = OP_FINISH;
+static int OpPrevTarget = OP_FAIL;
+static int MaxStackDepth = 0;
+
+#define STAT_OP_IN(opcode) do {\
+ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
+ OpCurr = opcode;\
+ OpCounter[opcode]++;\
+ GETTIME(ts);\
+} while (0)
+
+#define STAT_OP_OUT do {\
+ GETTIME(te);\
+ OpTime[OpCurr] += TIMEDIFF(te, ts);\
+} while (0)
+
+#ifdef RUBY_PLATFORM
+
+/*
+ * :nodoc:
+ */
+static VALUE onig_stat_print(void)
+{
+ onig_print_statistics(stderr);
+ return Qnil;
+}
+#endif
+
+extern void onig_statistics_init(void)
+{
+ int i;
+ for (i = 0; i < 256; i++) {
+ OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
+ }
+ MaxStackDepth = 0;
+
+#ifdef RUBY_PLATFORM
+ rb_define_global_function("onig_stat_print", onig_stat_print, 0);
+#endif
+}
+
+extern void
+onig_print_statistics(FILE* f)
+{
+ int i;
+ fprintf(f, " count prev time\n");
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ fprintf(f, "%8d: %8d: %10ld: %s\n",
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ }
+ fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
+}
+
+#define STACK_INC do {\
+ stk++;\
+ if (stk - stk_base > MaxStackDepth) \
+ MaxStackDepth = stk - stk_base;\
+} while (0)
+
+#else
+#define STACK_INC stk++
+
+#define STAT_OP_IN(opcode)
+#define STAT_OP_OUT
+#endif
+
+extern int
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
+{
+ OnigCodePoint n, *data;
+ OnigCodePoint low, high, x;
+
+ GET_CODE_POINT(n, p);
+ data = (OnigCodePoint* )p;
+ data++;
+
+ for (low = 0, high = n; low < high; ) {
+ x = (low + high) >> 1;
+ if (code > data[x * 2 + 1])
+ low = x + 1;
+ else
+ high = x;
+ }
+
+ return ((low < n && code >= data[low * 2]) ? 1 : 0);
+}
+
+static int
+is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
+{
+ int found;
+
+ if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
+ }
+ else {
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ }
+
+ if (IS_CCLASS_NOT(cc))
+ return !found;
+ else
+ return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int len;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ len = 2;
+ }
+ else {
+ len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ }
+ return is_code_in_cc(len, code, cc);
+}
+
+
+/* matching region of POSIX API */
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} posix_regmatch_t;
+
+/* match data(str - end) from position (sstart). */
+/* if sstart == str then set sprev to NULL. */
+static int
+match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
+ UChar* sprev, MatchArg* msa)
+{
+ static UChar FinishCode[] = { OP_FINISH };
+
+ int i, n, num_mem, best_len, pop_level;
+ LengthType tlen, tlen2;
+ MemNumType mem;
+ RelAddrType addr;
+ OnigOptionType option = reg->options;
+ OnigEncoding encode = reg->enc;
+ OnigAmbigType ambig_flag = reg->ambig_flag;
+ UChar *s, *q, *sbegin;
+ UChar *p = reg->p;
+ char *alloca_base;
+ StackType *stk_alloc, *stk_base, *stk, *stk_end;
+ StackType *stkp; /* used as any purpose. */
+ StackIndex si;
+ StackIndex *repeat_stk;
+ StackIndex *mem_start_stk, *mem_end_stk;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int scv;
+ unsigned char* state_check_buff = msa->state_check_buff;
+ int num_comb_exp_check = reg->num_comb_exp_check;
+#endif
+ n = reg->num_repeat + reg->num_mem * 2;
+
+ STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
+ pop_level = reg->stack_pop_level;
+ num_mem = reg->num_mem;
+ repeat_stk = (StackIndex* )alloca_base;
+
+ mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
+ mem_end_stk = mem_start_stk + num_mem;
+ mem_start_stk--; /* for index start from 1,
+ mem_start_stk[1]..mem_start_stk[num_mem] */
+ mem_end_stk--; /* for index start from 1,
+ mem_end_stk[1]..mem_end_stk[num_mem] */
+ for (i = 1; i <= num_mem; i++) {
+ mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
+ }
+
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
+ (int )str, (int )end, (int )sstart, (int )sprev);
+ fprintf(stderr, "size: %d, start offset: %d\n",
+ (int )(end - str), (int )(sstart - str));
+#endif
+
+ STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
+ best_len = ONIG_MISMATCH;
+ s = (UChar* )sstart;
+ while (1) {
+#ifdef ONIG_DEBUG_MATCH
+ {
+ UChar *q, *bp, buf[50];
+ int len;
+ fprintf(stderr, "%4d> \"", (int )(s - str));
+ bp = buf;
+ for (i = 0, q = s; i < 7 && q < end; i++) {
+ len = enc_len(encode, q);
+ while (len-- > 0) *bp++ = *q++;
+ }
+ if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
+ else { xmemcpy(bp, "\"", 1); bp += 1; }
+ *bp = 0;
+ fputs(buf, stderr);
+ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
+ onig_print_compiled_byte_code(stderr, p, NULL, encode);
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ sbegin = s;
+ switch (*p++) {
+ case OP_END: STAT_OP_IN(OP_END);
+ n = s - sstart;
+ if (n > best_len) {
+ OnigRegion* region;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
+ }
+#endif
+ best_len = n;
+ region = msa->region;
+ if (region) {
+#ifdef USE_POSIX_REGION_OPTION
+ if (IS_POSIX_REGION(msa->options)) {
+ posix_regmatch_t* rmt = (posix_regmatch_t* )region;
+
+ rmt[0].rm_so = sstart - str;
+ rmt[0].rm_eo = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
+
+ rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
+ }
+ }
+ }
+ else {
+#endif /* USE_POSIX_REGION_OPTION */
+ region->beg[0] = sstart - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ }
+
+#ifdef USE_CAPTURE_HISTORY
+ if (reg->capture_history != 0) {
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+ }
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = sstart - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
+#endif /* USE_CAPTURE_HISTORY */
+#ifdef USE_POSIX_REGION_OPTION
+ } /* else IS_POSIX_REGION() */
+#endif
+ } /* if (region) */
+ } /* n > best_len */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ end_best_len:
+#endif
+ STAT_OP_OUT;
+
+ if (IS_FIND_CONDITION(option)) {
+ if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
+ best_len = ONIG_MISMATCH;
+ goto fail; /* for retry */
+ }
+ if (IS_FIND_LONGEST(option) && s < end) {
+ goto fail; /* for retry */
+ }
+ }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
+ break;
+
+ case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
+#if 0
+ DATA_ENSURE(1);
+ if (*p != *s) goto fail;
+ p++; s++;
+#endif
+ if (*p != *s++) goto fail;
+ DATA_ENSURE(0);
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
+ {
+ int len;
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) {
+ goto fail;
+ }
+ p++; q++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACT2: STAT_OP_IN(OP_EXACT2);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT3: STAT_OP_IN(OP_EXACT3);
+ DATA_ENSURE(3);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT4: STAT_OP_IN(OP_EXACT4);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT5: STAT_OP_IN(OP_EXACT5);
+ DATA_ENSURE(5);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN: STAT_OP_IN(OP_EXACTN);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen);
+ while (tlen-- > 0) {
+ if (*p++ != *s++) goto fail;
+ }
+ sprev = s - 1;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
+ {
+ int len;
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
+
+ while (p < endp) {
+ sprev = s;
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) {
+ goto fail;
+ }
+ p++; q++;
+ }
+ }
+ }
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3);
+ DATA_ENSURE(6);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 2);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 2;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 3);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 3;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN);
+ GET_LENGTH_INC(tlen, p); /* mb-len */
+ GET_LENGTH_INC(tlen2, p); /* string len */
+ tlen2 *= tlen;
+ DATA_ENSURE(tlen2);
+ while (tlen2-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - tlen;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_CCLASS: STAT_OP_IN(OP_CCLASS);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
+ p += SIZE_BITSET;
+ s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
+
+ cclass_mb:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len;
+
+ DATA_ENSURE(1);
+ mb_len = enc_len(encode, s);
+ DATA_ENSURE(mb_len);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (! onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (! onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
+ p += SIZE_BITSET;
+ goto cclass_mb;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) == 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
+ p += SIZE_BITSET;
+ s += enc_len(encode, s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
+ s++;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ cclass_mb_not:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enc_len(encode, s);
+
+ if (s + mb_len > end) {
+ DATA_ENSURE(1);
+ s = (UChar* )end;
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+
+ cc_mb_not_success:
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
+ p += SIZE_BITSET;
+ goto cclass_mb_not;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) != 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
+ {
+ OnigCodePoint code;
+ void *node;
+ int mb_len;
+ UChar *ss;
+
+ DATA_ENSURE(1);
+ GET_POINTER_INC(node, p);
+ mb_len = enc_len(encode, s);
+ ss = s;
+ s += mb_len;
+ DATA_ENSURE(0);
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ s += n;
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ s += n;
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
+ while (s < end) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enc_len(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ while (s < end) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ while (s < end) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enc_len(encode, s);
+ if (n >1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
+ STAT_OP_OUT;
+ break;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+ case OP_WORD: STAT_OP_IN(OP_WORD);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enc_len(encode, s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enc_len(encode, s);
+ STAT_OP_OUT;
+ break;
+
+ case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
+ if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+
+ case OP_WORD_END: STAT_OP_IN(OP_WORD_END);
+ if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+#endif
+
+ case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF);
+ if (! ON_STR_BEGIN(s)) goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_END_BUF: STAT_OP_IN(OP_END_BUF);
+ if (! ON_STR_END(s)) goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE);
+ if (ON_STR_BEGIN(s)) {
+ if (IS_NOTBOL(msa->options)) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
+ STAT_OP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail;
+ STAT_OP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+#endif
+ goto fail;
+ break;
+
+ case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
+ STAT_OP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
+ ON_STR_END(s + enc_len(encode, s))) {
+ STAT_OP_OUT;
+ continue;
+ }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ UChar* ss = s + enc_len(encode, s);
+ if (ON_STR_END(ss + enc_len(encode, ss))) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+#endif
+ goto fail;
+ break;
+
+ case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION);
+ if (s != msa->start)
+ goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_START(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
+ GET_MEMNUM_INC(mem, p);
+ mem_start_stk[mem] = (StackIndex )((void* )s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_END(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
+ GET_MEMNUM_INC(mem, p);
+ STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
+ STACK_PUSH_MEM_END(mem, s);
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (StackIndex )((void* )s);
+ STACK_GET_MEM_START(mem, stkp);
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ else
+ mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+
+ STACK_PUSH_MEM_END_MARK(mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1);
+ mem = 1;
+ goto backref;
+ break;
+
+ case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2);
+ mem = 2;
+ goto backref;
+ break;
+
+ case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
+ GET_MEMNUM_INC(mem, p);
+ backref:
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(pstart, s, n);
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC);
+ GET_MEMNUM_INC(mem, p);
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(ambig_flag, pstart, &s, n);
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(pstart, swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ STAT_OP_OUT;
+ continue;
+ }
+ break;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ case OP_BACKREF_AT_LEVEL:
+ {
+ int len;
+ OnigOptionType ic;
+ LengthType level;
+
+ GET_OPTION_INC(ic, p);
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
+
+ sprev = s;
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
+ , (int )level, (int )tlen, p, &s, end)) {
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ }
+
+ break;
+#endif
+
+ case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
+ GET_OPTION_INC(option, p);
+ STACK_PUSH_ALT(p, s, sprev);
+ p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
+ GET_OPTION_INC(option, p);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START);
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_PUSH_NULL_CHECK_START(mem, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK(isnull, mem, s);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch (*p++) {
+ case OP_JUMP:
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ p += SIZE_MEMNUM;
+ break;
+ default:
+ goto unexpected_bytecode_error;
+ break;
+ }
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+#else
+ STACK_NULL_CHECK_REC(isnull, mem, s);
+#endif
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_JUMP: STAT_OP_IN(OP_JUMP);
+ GET_RELADDR_INC(addr, p);
+ p += addr;
+ STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_PUSH: STAT_OP_IN(OP_PUSH);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ GET_RELADDR_INC(addr, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) {
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_STATE_CHECK(s, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+ case OP_POP: STAT_OP_IN(OP_POP);
+ STACK_POP_ONE;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s && DATA_ENSURE_CHECK(1)) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ }
+ p += (addr + 1);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ }
+ p++;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT: STAT_OP_IN(OP_REPEAT);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p += addr;
+ }
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+
+ repeat_inc:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
+ /* end of repeat. Nothing to do. */
+ }
+ else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ }
+ STACK_PUSH_REPEAT_INC(si);
+ STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc;
+ break;
+
+ case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+
+ repeat_inc_ng:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc_ng;
+ break;
+
+ case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
+ STACK_PUSH_POS(s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_POP_POS: STAT_OP_IN(OP_POP_POS);
+ {
+ STACK_POS_END(stkp);
+ s = stkp->u.state.pstr;
+ sprev = stkp->u.state.pstr_prev;
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_POS_NOT(p + addr, s, sprev);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS);
+ STACK_POP_TIL_POS_NOT;
+ goto fail;
+ break;
+
+ case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT);
+ STACK_PUSH_STOP_BT;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT);
+ STACK_STOP_BT_END;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
+ GET_LENGTH_INC(tlen, p);
+ s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ if (IS_NULL(s)) goto fail;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
+ GET_RELADDR_INC(addr, p);
+ GET_LENGTH_INC(tlen, p);
+ q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ if (IS_NULL(q)) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ p += addr;
+ /* goto fail; */
+ }
+ else {
+ STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
+ s = q;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
+ STACK_POP_TIL_LOOK_BEHIND_NOT;
+ goto fail;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_CALL: STAT_OP_IN(OP_CALL);
+ GET_ABSADDR_INC(addr, p);
+ STACK_PUSH_CALL_FRAME(p);
+ p = reg->p + addr;
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_RETURN: STAT_OP_IN(OP_RETURN);
+ STACK_RETURN(p);
+ STACK_PUSH_RETURN;
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_FINISH:
+ goto finish;
+ break;
+
+ fail:
+ STAT_OP_OUT;
+ /* fall */
+ case OP_FAIL: STAT_OP_IN(OP_FAIL);
+ STACK_POP;
+ p = stk->u.state.pcode;
+ s = stk->u.state.pstr;
+ sprev = stk->u.state.pstr_prev;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (stk->u.state.state_check != 0) {
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
+ }
+#endif
+
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ default:
+ goto bytecode_error;
+
+ } /* end of switch */
+ sprev = sbegin;
+ } /* end of while(1) */
+
+ finish:
+ STACK_SAVE;
+ return best_len;
+
+#ifdef ONIG_DEBUG
+ stack_error:
+ STACK_SAVE;
+ return ONIGERR_STACK_BUG;
+#endif
+
+ bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNDEFINED_BYTECODE;
+
+ unexpected_bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNEXPECTED_BYTECODE;
+}
+
+
+static UChar*
+slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
+ const UChar* text, const UChar* text_end, UChar* text_range)
+{
+ UChar *t, *p, *s, *end;
+
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = (UChar* )text;
+
+ while (s < end) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ while (t < target_end) {
+ if (*t != *p++)
+ break;
+ t++;
+ }
+ if (t == target_end)
+ return s;
+ }
+ s += enc_len(enc, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static int
+str_lower_case_match(OnigEncoding enc, int ambig_flag,
+ const UChar* t, const UChar* tend,
+ const UChar* p, const UChar* end)
+{
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ const UChar* tsave;
+ const UChar* psave;
+
+ tsave = t;
+ psave = p;
+
+ while (t < tend) {
+ lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) {
+ return 0;
+ }
+ lowlen--;
+ }
+ }
+
+ return 1;
+}
+
+static UChar*
+slow_search_ic(OnigEncoding enc, int ambig_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* text_end, UChar* text_range)
+{
+ UChar *s, *end;
+
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = (UChar* )text;
+
+ while (s < end) {
+ if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ return s;
+
+ s += enc_len(enc, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ UChar *t, *p, *s;
+
+ s = (UChar* )text_end;
+ s -= (target_end - target);
+ if (s > text_start)
+ s = (UChar* )text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
+
+ while (s >= text) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ while (t < target_end) {
+ if (*t != *p++)
+ break;
+ t++;
+ }
+ if (t == target_end)
+ return s;
+ }
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ UChar *s;
+
+ s = (UChar* )text_end;
+ s -= (target_end - target);
+ if (s > text_start)
+ s = (UChar* )text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
+
+ while (s >= text) {
+ if (str_lower_case_match(enc, ambig_flag,
+ target, target_end, s, text_end))
+ return s;
+
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
+{
+ const UChar *s, *se, *t, *p, *end;
+ const UChar *tail;
+ int skip, tlen1;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
+ (int )text, (int )text_end, (int )text_range);
+#endif
+
+ tail = target_end - 1;
+ tlen1 = tail - target;
+ end = text_range;
+ if (end + tlen1 > text_end)
+ end = text_end - tlen1;
+
+ s = text;
+
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return (UChar* )s;
+
+ skip = reg->map[*se];
+ t = s;
+ do {
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
+ }
+ }
+ else {
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return (UChar* )s;
+
+ skip = reg->int_map[*se];
+ t = s;
+ do {
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
+ }
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end, const UChar* text_range)
+{
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
+
+ end = text_range + (target_end - target) - 1;
+ if (end > text_end)
+ end = text_end;
+
+ tail = target_end - 1;
+ s = text + (target_end - target) - 1;
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return (UChar* )(p + 1);
+ s += reg->map[*s];
+ }
+ }
+ else { /* see int_map[] */
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (t >= target && *p == *t) {
+ p--; t--;
+ }
+ if (t < target) return (UChar* )(p + 1);
+ s += reg->int_map[*s];
+ }
+ }
+ return (UChar* )NULL;
+}
+
+static int
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
+
+{
+ int i, len;
+
+ if (IS_NULL(*skip)) {
+ *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*skip)) return ONIGERR_MEMORY;
+ }
+
+ len = end - s;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ (*skip)[i] = len;
+
+ for (i = len - 1; i > 0; i--)
+ (*skip)[s[i]] = i;
+
+ return 0;
+}
+
+static UChar*
+bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ const UChar *s, *t, *p;
+
+ s = text_end - (target_end - target);
+ if (text_start < s)
+ s = text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+
+ while (s >= text) {
+ p = s;
+ t = target;
+ while (t < target_end && *p == *t) {
+ p++; t++;
+ }
+ if (t == target_end)
+ return (UChar* )s;
+
+ s -= reg->int_map_backward[*s];
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* text_range)
+{
+ const UChar *s = text;
+
+ while (s < text_range) {
+ if (map[*s]) return (UChar* )s;
+
+ s += enc_len(enc, s);
+ }
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search_backward(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_start)
+{
+ const UChar *s = text_start;
+
+ while (s >= text) {
+ if (map[*s]) return (UChar* )s;
+
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ }
+ return (UChar* )NULL;
+}
+
+extern int
+onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
+ OnigOptionType option)
+{
+ int r;
+ UChar *prev;
+ MatchArg msa;
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ goto start;
+ }
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+ MATCH_ARG_INIT(msa, option, region, at);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ {
+ int offset = at - str;
+ STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+ }
+#endif
+
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ }
+ else
+ r = 0;
+
+ if (r == 0) {
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
+ r = match_at(reg, str, end, at, prev, &msa);
+ }
+
+ MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC_THREAD(reg);
+ return r;
+}
+
+static int
+forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
+{
+ UChar *p, *pprev = (UChar* )NULL;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
+ (int )str, (int )end, (int )s, (int )range);
+#endif
+
+ p = s;
+ if (reg->dmin > 0) {
+ if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
+ p += reg->dmin;
+ }
+ else {
+ UChar *q = p + reg->dmin;
+ while (p < q) p += enc_len(reg->enc, p);
+ }
+ }
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ break;
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search(reg->enc, reg->map, p, range);
+ break;
+ }
+
+ if (p && p < range) {
+ if (p - reg->dmin < s) {
+ retry_gate:
+ pprev = p;
+ p += enc_len(reg->enc, p);
+ goto retry;
+ }
+
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ )
+ goto retry_gate;
+ break;
+ }
+ }
+
+ if (reg->dmax == 0) {
+ *low = p;
+ if (low_prev) {
+ if (*low > s)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
+ else
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ }
+ }
+ else {
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, (const UChar** )low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low);
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ *high = p - reg->dmin;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
+ (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
+#endif
+ return 1; /* success */
+ }
+
+ return 0; /* fail */
+}
+
+static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
+ int** skip));
+
+#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
+
+static int
+backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
+ UChar* s, const UChar* range, UChar* adjrange,
+ UChar** low, UChar** high)
+{
+ int r;
+ UChar *p;
+
+ range += reg->dmin;
+ p = s;
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ exact_method:
+ p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
+ range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ if (IS_NULL(reg->int_map_backward)) {
+ if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
+ goto exact_method;
+
+ r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
+ &(reg->int_map_backward));
+ if (r) return r;
+ }
+ p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
+ end, p);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
+ break;
+ }
+
+ if (p) {
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, p);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(prev)) goto fail;
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ ) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(p)) goto fail;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ *high = p - reg->dmin;
+ *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
+ (int )(*low - str), (int )(*high - str));
+#endif
+ return 1; /* success */
+ }
+
+ fail:
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: fail.\n");
+#endif
+ return 0; /* fail */
+}
+
+
+extern int
+onig_search(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
+{
+ int r;
+ UChar *s, *prev;
+ MatchArg msa;
+ const UChar *orig_start = start;
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ goto start;
+ }
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+ (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ if (region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ if (r) goto finish_no_msa;
+ }
+
+ if (start > end || start < str) goto mismatch_no_msa;
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
+#endif
+
+ /* anchor optimize: resume search range */
+ if (reg->anchor != 0 && str < end) {
+ UChar *min_semi_end, *max_semi_end;
+
+ if (reg->anchor & ANCHOR_BEGIN_POSITION) {
+ /* search start-position only */
+ begin_position:
+ if (range > start)
+ range = start + 1;
+ else
+ range = start;
+ }
+ else if (reg->anchor & ANCHOR_BEGIN_BUF) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
+ }
+ else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ }
+ else
+ goto mismatch_no_msa;
+ }
+ }
+ else if (reg->anchor & ANCHOR_END_BUF) {
+ min_semi_end = max_semi_end = (UChar* )end;
+
+ end_buf:
+ if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
+ goto mismatch_no_msa;
+
+ if (range > start) {
+ if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
+ else { /* match with empty at end */
+ start = onigenc_get_prev_char_head(reg->enc, str, end);
+ }
+ }
+ if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = max_semi_end - reg->anchor_dmin + 1;
+ }
+
+ if (start >= range) goto mismatch_no_msa;
+ }
+ else {
+ if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
+ }
+ if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
+ start = max_semi_end - reg->anchor_dmin;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ }
+ if (range > start) goto mismatch_no_msa;
+ }
+ }
+ else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
+ UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
+
+ max_semi_end = (UChar* )end;
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
+#endif
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
+ }
+ else {
+ min_semi_end = (UChar* )end;
+ goto end_buf;
+ }
+ }
+ else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
+ goto begin_position;
+ }
+ }
+ else if (str == end) { /* empty string */
+ static const UChar* address_for_empty_string = (UChar* )"";
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search: empty string.\n");
+#endif
+
+ if (reg->threshold_len == 0) {
+ start = end = str = address_for_empty_string;
+ s = (UChar* )start;
+ prev = (UChar* )NULL;
+
+ MATCH_ARG_INIT(msa, option, region, start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ msa.state_check_buff = (void* )0;
+ msa.state_check_buff_size = 0;
+#endif
+ MATCH_AND_RETURN_CHECK;
+ goto mismatch;
+ }
+ goto mismatch_no_msa;
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
+ (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ MATCH_ARG_INIT(msa, option, region, orig_start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ {
+ int offset = (MIN(start, range) - str);
+ STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+ }
+#endif
+
+ s = (UChar* )start;
+ if (range > start) { /* forward search */
+ if (s > str)
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ else
+ prev = (UChar* )NULL;
+
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *sch_range, *low, *high, *low_prev;
+
+ sch_range = (UChar* )range;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_range = (UChar* )end;
+ else {
+ sch_range += reg->dmax;
+ if (sch_range > end) sch_range = (UChar* )end;
+ }
+ }
+
+ if ((end - start) < reg->threshold_len)
+ goto mismatch;
+
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ do {
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, &low_prev)) goto mismatch;
+ if (s < low) {
+ s = low;
+ prev = low_prev;
+ }
+ while (s <= high) {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, s);
+ }
+ } while (s < range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) goto mismatch;
+
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ do {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, s);
+
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
+ prev = s;
+ s += enc_len(reg->enc, s);
+ }
+ } while (s < range);
+ goto mismatch;
+ }
+ }
+ }
+
+ do {
+ MATCH_AND_RETURN_CHECK;
+ prev = s;
+ s += enc_len(reg->enc, s);
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ MATCH_AND_RETURN_CHECK;
+ }
+ }
+ else { /* backward search */
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *low, *high, *adjrange, *sch_start;
+
+ if (range < end)
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ else
+ adjrange = (UChar* )end;
+
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
+ (end - range) >= reg->threshold_len) {
+ do {
+ sch_start = s + reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0)
+ goto mismatch;
+
+ if (s > high)
+ s = high;
+
+ while (s >= low) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ MATCH_AND_RETURN_CHECK;
+ s = prev;
+ }
+ } while (s >= range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if ((end - range) < reg->threshold_len) goto mismatch;
+
+ sch_start = s;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_start = (UChar* )end;
+ else {
+ sch_start += reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ else
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start);
+ }
+ }
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0) goto mismatch;
+ }
+ }
+
+ do {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ MATCH_AND_RETURN_CHECK;
+ s = prev;
+ } while (s >= range);
+ }
+
+ mismatch:
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(reg->options)) {
+ if (msa.best_len >= 0) {
+ s = msa.best_s;
+ goto match;
+ }
+ }
+#endif
+ r = ONIG_MISMATCH;
+
+ finish:
+ MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC_THREAD(reg);
+
+ /* If result is mismatch and no FIND_NOT_EMPTY option,
+ then the region is not setted in match_at(). */
+ if (IS_FIND_NOT_EMPTY(reg->options) && region
+#ifdef USE_POSIX_REGION_OPTION
+ && !IS_POSIX_REGION(option)
+#endif
+ ) {
+ onig_region_clear(region);
+ }
+
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ mismatch_no_msa:
+ r = ONIG_MISMATCH;
+ finish_no_msa:
+ ONIG_STATE_DEC_THREAD(reg);
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ match:
+ ONIG_STATE_DEC_THREAD(reg);
+ MATCH_ARG_FREE(msa);
+ return s - str;
+}
+
+extern OnigEncoding
+onig_get_encoding(regex_t* reg)
+{
+ return reg->enc;
+}
+
+extern OnigOptionType
+onig_get_options(regex_t* reg)
+{
+ return reg->options;
+}
+
+extern OnigAmbigType
+onig_get_ambig_flag(regex_t* reg)
+{
+ return reg->ambig_flag;
+}
+
+extern OnigSyntaxType*
+onig_get_syntax(regex_t* reg)
+{
+ return reg->syntax;
+}
+
+extern int
+onig_number_of_captures(regex_t* reg)
+{
+ return reg->num_mem;
+}
+
+extern int
+onig_number_of_capture_histories(regex_t* reg)
+{
+#ifdef USE_CAPTURE_HISTORY
+ int i, n;
+
+ n = 0;
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0)
+ n++;
+ }
+ return n;
+#else
+ return 0;
+#endif
+}
+
+extern void
+onig_copy_encoding(OnigEncoding to, OnigEncoding from)
+{
+ *to = *from;
+}
+
diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c
new file mode 100644
index 0000000..f5ad1f3
--- /dev/null
+++ b/ext/mbstring/oniguruma/regext.c
@@ -0,0 +1,215 @@
+/**********************************************************************
+ regext.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+static void
+conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = *s++;
+ }
+}
+
+static void
+conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = *s++;
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = '\0';
+ }
+}
+
+static void
+conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = '\0';
+ *conv++ = *s++;
+ }
+}
+
+static void
+conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = *s++;
+ *conv++ = '\0';
+ }
+}
+
+static void
+conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = s[3];
+ *conv++ = s[2];
+ *conv++ = s[1];
+ *conv++ = s[0];
+ s += 4;
+ }
+}
+
+static void
+conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = s[1];
+ *conv++ = s[0];
+ s += 2;
+ }
+}
+
+static int
+conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
+ UChar** conv, UChar** conv_end)
+{
+ int len = end - s;
+
+ if (to == ONIG_ENCODING_UTF16_BE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 2);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 2);
+ conv_ext0be(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF16_LE) {
+ swap16:
+ *conv = (UChar* )xmalloc(len);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + len;
+ conv_swap2bytes(s, end, *conv);
+ return 0;
+ }
+ }
+ else if (to == ONIG_ENCODING_UTF16_LE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 2);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 2);
+ conv_ext0le(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF16_BE) {
+ goto swap16;
+ }
+ }
+ if (to == ONIG_ENCODING_UTF32_BE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 4);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 4);
+ conv_ext0be32(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF32_LE) {
+ swap32:
+ *conv = (UChar* )xmalloc(len);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + len;
+ conv_swap4bytes(s, end, *conv);
+ return 0;
+ }
+ }
+ else if (to == ONIG_ENCODING_UTF32_LE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 4);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 4);
+ conv_ext0le32(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF32_BE) {
+ goto swap32;
+ }
+ }
+
+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
+}
+
+extern int
+onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+{
+ int r;
+ UChar *cpat, *cpat_end;
+
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+ if (ci->pattern_enc != ci->target_enc) {
+ r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
+ &cpat, &cpat_end);
+ if (r) return r;
+ }
+ else {
+ cpat = (UChar* )pattern;
+ cpat_end = (UChar* )pattern_end;
+ }
+
+ r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
+ ci->syntax);
+ if (r) goto err;
+
+ r = onig_compile(*reg, cpat, cpat_end, einfo);
+ if (r) {
+ onig_free(*reg);
+ *reg = NULL;
+ }
+
+ err:
+ if (cpat != pattern) xfree(cpat);
+
+ return r;
+}
+
+#ifdef USE_RECOMPILE_API
+extern int
+onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+{
+ int r;
+ regex_t *new_reg;
+
+ r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
+ if (r) return r;
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
+ }
+ else {
+ onig_chain_link_add(reg, new_reg);
+ }
+ return 0;
+}
+#endif
diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c
new file mode 100644
index 0000000..248957c
--- /dev/null
+++ b/ext/mbstring/oniguruma/reggnu.c
@@ -0,0 +1,175 @@
+/**********************************************************************
+ reggnu.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+#ifndef ONIGGNU_H
+#include "oniggnu.h"
+#endif
+
+extern void
+re_free_registers(OnigRegion* r)
+{
+ /* 0: don't free self */
+ onig_region_free(r, 0);
+}
+
+extern int
+re_adjust_startpos(regex_t* reg, const char* string, int size,
+ int startpos, int range)
+{
+ if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
+ UChar *p;
+ UChar *s = (UChar* )string + startpos;
+
+ if (range > 0) {
+ p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
+ }
+ else {
+ p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
+ }
+ return p - (UChar* )string;
+ }
+
+ return startpos;
+}
+
+extern int
+re_match(regex_t* reg, const char* str, int size, int pos,
+ struct re_registers* regs)
+{
+ return onig_match(reg, (UChar* )str, (UChar* )(str + size),
+ (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
+}
+
+extern int
+re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
+ struct re_registers* regs)
+{
+ return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
+ (UChar* )(string + startpos),
+ (UChar* )(string + startpos + range),
+ regs, ONIG_OPTION_NONE);
+}
+
+extern int
+re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
+{
+ int r;
+ OnigErrorInfo einfo;
+
+ r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
+ if (r != 0) {
+ if (IS_NOT_NULL(ebuf))
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
+ }
+
+ return r;
+}
+
+#ifdef USE_RECOMPILE_API
+extern int
+re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
+{
+ int r;
+ OnigErrorInfo einfo;
+ OnigEncoding enc;
+
+ /* I think encoding and options should be arguments of this function.
+ But this is adapted to present re.c. (2002/11/29)
+ */
+ enc = OnigEncDefaultCharEncoding;
+
+ r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
+ reg->options, enc, OnigDefaultSyntax, &einfo);
+ if (r != 0) {
+ if (IS_NOT_NULL(ebuf))
+ (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
+ }
+ return r;
+}
+#endif
+
+extern void
+re_free_pattern(regex_t* reg)
+{
+ onig_free(reg);
+}
+
+extern int
+re_alloc_pattern(regex_t** reg)
+{
+ return onig_alloc_init(reg, ONIG_OPTION_DEFAULT,
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ OnigEncDefaultCharEncoding,
+ OnigDefaultSyntax);
+}
+
+extern void
+re_set_casetable(const char* table)
+{
+ onigenc_set_default_caseconv_table((UChar* )table);
+}
+
+extern void
+#ifdef ONIG_RUBY_M17N
+re_mbcinit(OnigEncoding enc)
+#else
+re_mbcinit(int mb_code)
+#endif
+{
+#ifdef ONIG_RUBY_M17N
+
+ onigenc_set_default_encoding(enc);
+
+#else
+
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case RE_MBCTYPE_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case RE_MBCTYPE_EUC:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case RE_MBCTYPE_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case RE_MBCTYPE_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
+#endif
+}
diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h
new file mode 100644
index 0000000..d6819d8
--- /dev/null
+++ b/ext/mbstring/oniguruma/regint.h
@@ -0,0 +1,830 @@
+#ifndef REGINT_H
+#define REGINT_H
+/**********************************************************************
+ regint.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* for debug */
+/* #define ONIG_DEBUG_PARSE_TREE */
+/* #define ONIG_DEBUG_COMPILE */
+/* #define ONIG_DEBUG_SEARCH */
+/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DONT_OPTIMIZE */
+
+/* for byte-code statistical data. */
+/* #define ONIG_DEBUG_STATISTICS */
+
+#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
+ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
+ defined(ONIG_DEBUG_STATISTICS)
+#ifndef ONIG_DEBUG
+#define ONIG_DEBUG
+#endif
+#endif
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ (defined(__ppc__) && defined(__APPLE__)) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(__mc68020__)
+#define PLATFORM_UNALIGNED_WORD_ACCESS
+#endif
+
+/* config */
+/* spec. config */
+/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
+#define USE_NAMED_GROUP
+#define USE_SUBEXP_CALL
+#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
+#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
+#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* #define USE_RECOMPILE_API */
+/* treat \r\n as line terminator.
+ !!! NO SUPPORT !!!
+ use this configuration on your own responsibility */
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+
+/* internal config */
+#define USE_RECYCLE_NODE
+#define USE_OP_PUSH_OR_JUMP_EXACT
+#define USE_QUANTIFIER_PEEK_NEXT
+#define USE_ST_HASH_TABLE
+#define USE_SHARED_CCLASS_TABLE
+
+#define INIT_MATCH_STACK_SIZE 160
+#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+
+/* interface to external system */
+#ifdef NOT_RUBY /* given from Makefile */
+#include "config.h"
+#define USE_BACKREF_AT_LEVEL
+#define USE_CAPTURE_HISTORY
+#define USE_VARIABLE_META_CHARS
+#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
+#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
+/* #define USE_MULTI_THREAD_SYSTEM */
+#define THREAD_SYSTEM_INIT /* depend on thread system */
+#define THREAD_SYSTEM_END /* depend on thread system */
+#define THREAD_ATOMIC_START /* depend on thread system */
+#define THREAD_ATOMIC_END /* depend on thread system */
+#define THREAD_PASS /* depend on thread system */
+#define xmalloc malloc
+#define xrealloc realloc
+#define xcalloc calloc
+#define xfree free
+#else
+#include "ruby.h"
+#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+
+#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
+#define USE_MULTI_THREAD_SYSTEM
+#define THREAD_SYSTEM_INIT
+#define THREAD_SYSTEM_END
+#define THREAD_ATOMIC_START DEFER_INTS
+#define THREAD_ATOMIC_END ENABLE_INTS
+#define THREAD_PASS rb_thread_schedule()
+
+#define DEFAULT_WARN_FUNCTION onig_rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning
+
+#endif /* else NOT_RUBY */
+
+#define STATE_CHECK_STRING_THRESHOLD_LEN 7
+#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
+
+#define THREAD_PASS_LIMIT_COUNT 8
+#define xmemset memset
+#define xmemcpy memcpy
+#define xmemmove memmove
+#if defined(_WIN32) && !defined(__GNUC__)
+#define xalloca _alloca
+#if _MSC_VER < 1500
+#ifndef vsnprintf
+#define vsnprintf _vsnprintf
+#endif
+#endif
+#else
+#define xalloca alloca
+#endif
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
+
+#define ONIG_STATE_INC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state++;\
+ THREAD_ATOMIC_END;\
+} while(0)
+#define ONIG_STATE_DEC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state--;\
+ THREAD_ATOMIC_END;\
+} while(0)
+#else
+#define ONIG_STATE_INC(reg) /* Nothing */
+#define ONIG_STATE_DEC(reg) /* Nothing */
+#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
+#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+
+#define onig_st_is_member st_is_member
+
+#ifdef NOT_RUBY
+
+#define st_init_table onig_st_init_table
+#define st_init_table_with_size onig_st_init_table_with_size
+#define st_init_numtable onig_st_init_numtable
+#define st_init_numtable_with_size onig_st_init_numtable_with_size
+#define st_init_strtable onig_st_init_strtable
+#define st_init_strtable_with_size onig_st_init_strtable_with_size
+#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
+#define st_delete onig_st_delete
+#define st_delete_safe onig_st_delete_safe
+#define st_insert onig_st_insert
+#define st_insert_strend onig_st_insert_strend
+#define st_lookup onig_st_lookup
+#define st_lookup_strend onig_st_lookup_strend
+#define st_foreach onig_st_foreach
+#define st_add_direct onig_st_add_direct
+#define st_add_direct_strend onig_st_add_direct_strend
+#define st_free_table onig_st_free_table
+#define st_cleanup_safe onig_st_cleanup_safe
+#define st_copy onig_st_copy
+#define st_nothing_key_clone onig_st_nothing_key_clone
+#define st_nothing_key_free onig_st_nothing_key_free
+
+#else /* NOT_RUBY */
+
+#define onig_st_init_table st_init_table
+#define onig_st_init_table_with_size st_init_table_with_size
+#define onig_st_init_numtable st_init_numtable
+#define onig_st_init_numtable_with_size st_init_numtable_with_size
+#define onig_st_init_strtable st_init_strtable
+#define onig_st_init_strtable_with_size st_init_strtable_with_size
+#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
+#define onig_st_delete st_delete
+#define onig_st_delete_safe st_delete_safe
+#define onig_st_insert st_insert
+#define onig_st_insert_strend st_insert_strend
+#define onig_st_lookup st_lookup
+#define onig_st_lookup_strend st_lookup_strend
+#define onig_st_foreach st_foreach
+#define onig_st_add_direct st_add_direct
+#define onig_st_add_direct_strend st_add_direct_strend
+#define onig_st_free_table st_free_table
+#define onig_st_cleanup_safe st_cleanup_safe
+#define onig_st_copy st_copy
+#define onig_st_nothing_key_clone st_nothing_key_clone
+#define onig_st_nothing_key_free st_nothing_key_free
+
+#endif /* NOT_RUBY */
+
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
+#include <alloca.h>
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
+#ifndef __BORLANDC__
+#include <sys/types.h>
+#endif
+#endif
+
+#ifdef __BORLANDC__
+#include <malloc.h>
+#endif
+
+#ifdef ONIG_DEBUG
+# include <stdio.h>
+#endif
+
+#include "regenc.h"
+#include "oniguruma.h"
+
+#ifdef MIN
+#undef MIN
+#endif
+#ifdef MAX
+#undef MAX
+#endif
+#define MIN(a,b) (((a)>(b))?(b):(a))
+#define MAX(a,b) (((a)<(b))?(b):(a))
+
+#define IS_NULL(p) (((void*)(p)) == (void*)0)
+#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
+#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
+#define NULL_UCHARP ((UChar* )0)
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+/* sizeof(OnigCodePoint) */
+#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
+
+#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
+ (pad_size) = WORD_ALIGNMENT_SIZE \
+ - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
+ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
+} while (0)
+
+#define ALIGNMENT_RIGHT(addr) do {\
+ (addr) += (WORD_ALIGNMENT_SIZE - 1);\
+ (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
+} while (0)
+
+
+#define B_SHIFT 8
+#define B_MASK 0xff
+
+#define SERIALIZE_2BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT) & B_MASK;\
+ *((p)+1) = (i) & B_MASK;\
+} while (0)
+
+#define SERIALIZE_4BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT*3) & B_MASK;\
+ *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
+ *((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\
+ *((p)+3) = (i) & B_MASK;\
+} while (0)
+
+#define SERIALIZE_8BYTE_INT(i,p) do {\
+ *(p) = ((i) >> B_SHIFT*7) & B_MASK;\
+ *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
+ *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
+ *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
+ *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
+ *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
+ *((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\
+ *((p)+7) = (i) & B_MASK;\
+} while (0)
+
+#define GET_2BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
+ (p) += 2;\
+} while (0)
+
+#define GET_4BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
+ ((unsigned int )((p)[1]) << B_SHIFT*2) | \
+ ((unsigned int )((p)[2]) << B_SHIFT ) | \
+ ((unsigned int )((p)[3]) )); \
+ (p) += 4;\
+} while (0)
+
+#define GET_8BYTE_INT_INC(type,i,p) do {\
+ (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
+ ((unsigned long )((p)[1]) << B_SHIFT*6) | \
+ ((unsigned long )((p)[2]) << B_SHIFT*5) | \
+ ((unsigned long )((p)[3]) << B_SHIFT*4) | \
+ ((unsigned long )((p)[4]) << B_SHIFT*3) | \
+ ((unsigned long )((p)[5]) << B_SHIFT*2) | \
+ ((unsigned long )((p)[6]) << B_SHIFT ) | \
+ ((unsigned long )((p)[7]) )); \
+ (p) += 8;\
+} while (0)
+
+#if SIZEOF_SHORT == 2
+#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#elif SIZEOF_SHORT == 4
+#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#elif SIZEOF_SHORT == 8
+#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p)
+#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#endif
+
+#if SIZEOF_INT == 2
+#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p)
+#elif SIZEOF_INT == 4
+#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p)
+#elif SIZEOF_INT == 8
+#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p)
+#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p)
+#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
+#endif
+
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+/* stack pop level */
+#define STACK_POP_LEVEL_FREE 0
+#define STACK_POP_LEVEL_MEM_START 1
+#define STACK_POP_LEVEL_ALL 2
+
+/* optimize flags */
+#define ONIG_OPTIMIZE_NONE 0
+#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
+#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
+#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
+#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
+#define ONIG_OPTIMIZE_MAP 5 /* char map */
+
+/* bit status */
+typedef unsigned int BitStatusType;
+
+#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
+#define BIT_STATUS_CLEAR(stats) (stats) = 0
+#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
+#define BIT_STATUS_AT(stats,n) \
+ ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
+
+#define BIT_STATUS_ON_AT(stats,n) do {\
+ if ((n) < BIT_STATUS_BITS_NUM)\
+ (stats) |= (1 << (n));\
+ else\
+ (stats) |= 1;\
+} while (0)
+
+#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
+ if ((n) < BIT_STATUS_BITS_NUM)\
+ (stats) |= (1 << (n));\
+} while (0)
+
+
+#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
+
+#define DIGITVAL(code) ((code) - '0')
+#define ODIGITVAL(code) DIGITVAL(code)
+#define XDIGITVAL(enc,code) \
+ (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
+ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
+
+#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
+#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
+#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
+#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
+#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
+#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
+#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
+#define IS_FIND_CONDITION(option) ((option) & \
+ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
+#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
+#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
+#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
+
+/* OP_SET_OPTION is required for these options.
+#define IS_DYNAMIC_OPTION(option) \
+ (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
+*/
+/* ignore-case and multibyte status are included in compiled code. */
+#define IS_DYNAMIC_OPTION(option) 0
+
+#define REPEAT_INFINITE -1
+#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+
+/* bitset */
+#define BITS_PER_BYTE 8
+#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
+#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
+#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+typedef unsigned int Bits;
+#else
+typedef unsigned char Bits;
+#endif
+typedef Bits BitSet[BITSET_SIZE];
+typedef Bits* BitSetRef;
+
+#define SIZE_BITSET sizeof(BitSet)
+
+#define BITSET_CLEAR(bs) do {\
+ int i;\
+ for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
+} while (0)
+
+#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
+#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
+
+#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
+#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
+#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
+#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
+
+/* bytes buffer */
+typedef struct _BBuf {
+ UChar* p;
+ unsigned int used;
+ unsigned int alloc;
+} BBuf;
+
+#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
+
+#define BBUF_SIZE_INC(buf,inc) do{\
+ (buf)->alloc += (inc);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_EXPAND(buf,low) do{\
+ do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_ENSURE_SIZE(buf,size) do{\
+ unsigned int new_alloc = (buf)->alloc;\
+ while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
+ if ((buf)->alloc != new_alloc) {\
+ (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+ (buf)->alloc = new_alloc;\
+ }\
+} while (0)
+
+#define BBUF_WRITE(buf,pos,bytes,n) do{\
+ int used = (pos) + (n);\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_WRITE1(buf,pos,byte) do{\
+ int used = (pos) + 1;\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ (buf)->p[(pos)] = (byte);\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
+#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
+#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
+#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
+
+/* from < to */
+#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
+ if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+ if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
+ (buf)->used -= (from - to);\
+} while (0)
+
+#define BBUF_INSERT(buf,pos,bytes,n) do {\
+ if (pos >= (buf)->used) {\
+ BBUF_WRITE(buf,pos,bytes,n);\
+ }\
+ else {\
+ BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ }\
+} while (0)
+
+#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
+
+
+#define ANCHOR_BEGIN_BUF (1<<0)
+#define ANCHOR_BEGIN_LINE (1<<1)
+#define ANCHOR_BEGIN_POSITION (1<<2)
+#define ANCHOR_END_BUF (1<<3)
+#define ANCHOR_SEMI_END_BUF (1<<4)
+#define ANCHOR_END_LINE (1<<5)
+
+#define ANCHOR_WORD_BOUND (1<<6)
+#define ANCHOR_NOT_WORD_BOUND (1<<7)
+#define ANCHOR_WORD_BEGIN (1<<8)
+#define ANCHOR_WORD_END (1<<9)
+#define ANCHOR_PREC_READ (1<<10)
+#define ANCHOR_PREC_READ_NOT (1<<11)
+#define ANCHOR_LOOK_BEHIND (1<<12)
+#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
+
+#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
+#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+
+/* operation code */
+enum OpCode {
+ OP_FINISH = 0, /* matching process terminator (no more alternative) */
+ OP_END = 1, /* pattern code terminator (success end) */
+
+ OP_EXACT1 = 2, /* single byte, N = 1 */
+ OP_EXACT2, /* single byte, N = 2 */
+ OP_EXACT3, /* single byte, N = 3 */
+ OP_EXACT4, /* single byte, N = 4 */
+ OP_EXACT5, /* single byte, N = 5 */
+ OP_EXACTN, /* single byte */
+ OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
+ OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
+ OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
+ OP_EXACTMB2N, /* mb-length = 2 */
+ OP_EXACTMB3N, /* mb-length = 3 */
+ OP_EXACTMBN, /* other length */
+
+ OP_EXACT1_IC, /* single byte, N = 1, ignore case */
+ OP_EXACTN_IC, /* single byte, ignore case */
+
+ OP_CCLASS,
+ OP_CCLASS_MB,
+ OP_CCLASS_MIX,
+ OP_CCLASS_NOT,
+ OP_CCLASS_MB_NOT,
+ OP_CCLASS_MIX_NOT,
+ OP_CCLASS_NODE, /* pointer to CClassNode node */
+
+ OP_ANYCHAR, /* "." */
+ OP_ANYCHAR_ML, /* "." multi-line */
+ OP_ANYCHAR_STAR, /* ".*" */
+ OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
+ OP_ANYCHAR_STAR_PEEK_NEXT,
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT,
+
+ OP_WORD,
+ OP_NOT_WORD,
+ OP_WORD_BOUND,
+ OP_NOT_WORD_BOUND,
+ OP_WORD_BEGIN,
+ OP_WORD_END,
+
+ OP_BEGIN_BUF,
+ OP_END_BUF,
+ OP_BEGIN_LINE,
+ OP_END_LINE,
+ OP_SEMI_END_BUF,
+ OP_BEGIN_POSITION,
+
+ OP_BACKREF1,
+ OP_BACKREF2,
+ OP_BACKREFN,
+ OP_BACKREFN_IC,
+ OP_BACKREF_MULTI,
+ OP_BACKREF_MULTI_IC,
+ OP_BACKREF_AT_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+
+ OP_MEMORY_START,
+ OP_MEMORY_START_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
+ OP_MEMORY_END,
+ OP_MEMORY_END_REC, /* push marker to stack */
+
+ OP_SET_OPTION_PUSH, /* set option and push recover option */
+ OP_SET_OPTION, /* set option */
+
+ OP_FAIL, /* pop stack and move */
+ OP_JUMP,
+ OP_PUSH,
+ OP_POP,
+ OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
+ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
+ OP_REPEAT, /* {n,m} */
+ OP_REPEAT_NG, /* {n,m}? (non greedy) */
+ OP_REPEAT_INC,
+ OP_REPEAT_INC_NG, /* non greedy */
+ OP_REPEAT_INC_SG, /* search and get in stack */
+ OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
+ OP_NULL_CHECK_START, /* null loop checker start */
+ OP_NULL_CHECK_END, /* null loop checker end */
+ OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
+
+ OP_PUSH_POS, /* (?=...) start */
+ OP_POP_POS, /* (?=...) end */
+ OP_PUSH_POS_NOT, /* (?!...) start */
+ OP_FAIL_POS, /* (?!...) end */
+ OP_PUSH_STOP_BT, /* (?>...) start */
+ OP_POP_STOP_BT, /* (?>...) end */
+ OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
+ OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
+ OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
+
+ OP_CALL, /* \g<name> */
+ OP_RETURN,
+
+ OP_STATE_CHECK_PUSH, /* combination explosion check and push */
+ OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
+ OP_STATE_CHECK, /* check only */
+ OP_STATE_CHECK_ANYCHAR_STAR,
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+};
+
+typedef int RelAddrType;
+typedef int AbsAddrType;
+typedef int LengthType;
+typedef int RepeatNumType;
+typedef short int MemNumType;
+typedef short int StateCheckNumType;
+typedef void* PointerType;
+
+#define SIZE_OPCODE 1
+#define SIZE_RELADDR sizeof(RelAddrType)
+#define SIZE_ABSADDR sizeof(AbsAddrType)
+#define SIZE_LENGTH sizeof(LengthType)
+#define SIZE_MEMNUM sizeof(MemNumType)
+#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
+#define SIZE_REPEATNUM sizeof(RepeatNumType)
+#define SIZE_OPTION sizeof(OnigOptionType)
+#define SIZE_CODE_POINT sizeof(OnigCodePoint)
+#define SIZE_POINTER sizeof(PointerType)
+
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+ val = *(type* )p;\
+ (p) += sizeof(type);\
+} while(0)
+
+#else
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+ xmemcpy(&val, (p), sizeof(type));\
+ (p) += sizeof(type);\
+} while(0)
+
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
+#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
+#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
+#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
+#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
+#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
+#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
+#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
+
+/* code point's address must be aligned address. */
+#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
+#define GET_BYTE_INC(byte,p) do{\
+ byte = *(p);\
+ (p)++;\
+} while(0)
+
+
+/* op-code + arg size */
+#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
+#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
+#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP SIZE_OPCODE
+#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_POS SIZE_OPCODE
+#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP_POS SIZE_OPCODE
+#define SIZE_OP_FAIL_POS SIZE_OPCODE
+#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_FAIL SIZE_OPCODE
+#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
+#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
+#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
+#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
+#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
+#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
+#define SIZE_OP_RETURN SIZE_OPCODE
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#endif
+
+#define MC_ESC(enc) (enc)->meta_char_table.esc
+#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
+#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
+#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
+#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
+
+#define IS_MC_ESC_CODE(code, enc, syn) \
+ ((code) == MC_ESC(enc) && \
+ !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
+/* cclass node */
+#define FLAG_CCLASS_NOT 1
+#define FLAG_CCLASS_SHARE (1<<1)
+
+#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
+#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
+#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
+#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
+#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
+
+typedef struct {
+ int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
+
+#ifdef ONIG_DEBUG
+
+typedef struct {
+ short int opcode;
+ char* name;
+ short int arg_type;
+} OnigOpInfoType;
+
+extern OnigOpInfoType OnigOpInfo[];
+
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
+
+#ifdef ONIG_DEBUG_STATISTICS
+extern void onig_statistics_init P_((void));
+extern void onig_print_statistics P_((FILE* f));
+#endif
+#endif
+
+extern UChar* onig_error_code_to_format P_((int code));
+extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
+extern int onig_bbuf_init P_((BBuf* buf, int size));
+extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
+extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
+extern void onig_chain_reduce P_((regex_t* reg));
+extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
+extern void onig_transfer P_((regex_t* to, regex_t* from));
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+
+#endif /* REGINT_H */
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c
new file mode 100644
index 0000000..abf2cc1
--- /dev/null
+++ b/ext/mbstring/oniguruma/regparse.c
@@ -0,0 +1,5290 @@
+/**********************************************************************
+ regparse.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regparse.h"
+
+#define WARN_BUFSIZE 256
+
+OnigSyntaxType OnigSyntaxRuby = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_RUBY |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
+ ONIG_SYN_OP2_ESC_H_XDIGIT )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
+ ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
+
+extern void onig_null_warn(const char* s) { }
+
+#ifdef RUBY_PLATFORM
+extern void
+onig_rb_warn(const char* s)
+{
+ rb_warn("%s", s);
+}
+
+extern void
+onig_rb_warning(const char* s)
+{
+ rb_warning("%s", s);
+}
+#endif
+
+#ifdef DEFAULT_WARN_FUNCTION
+static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_warn = onig_null_warn;
+#endif
+
+#ifdef DEFAULT_VERB_WARN_FUNCTION
+static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_verb_warn = onig_null_warn;
+#endif
+
+extern void onig_set_warn_func(OnigWarnFunc f)
+{
+ onig_warn = f;
+}
+
+extern void onig_set_verb_warn_func(OnigWarnFunc f)
+{
+ onig_verb_warn = f;
+}
+
+static void
+bbuf_free(BBuf* bbuf)
+{
+ if (IS_NOT_NULL(bbuf)) {
+ if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
+ xfree(bbuf);
+ }
+}
+
+static int
+bbuf_clone(BBuf** rto, BBuf* from)
+{
+ int r;
+ BBuf *to;
+
+ *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
+ r = BBUF_INIT(to, from->alloc);
+ if (r != 0) return r;
+ to->used = from->used;
+ xmemcpy(to->p, from->p, from->used);
+ return 0;
+}
+
+#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
+
+#define MBCODE_START_POS(enc) \
+ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
+
+#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
+ add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
+
+#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
+ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
+ if (r) return r;\
+ }\
+} while (0)
+
+
+#define BITSET_IS_EMPTY(bs,empty) do {\
+ int i;\
+ empty = 1;\
+ for (i = 0; i < BITSET_SIZE; i++) {\
+ if ((bs)[i] != 0) {\
+ empty = 0; break;\
+ }\
+ }\
+} while (0)
+
+static void
+bitset_set_range(BitSetRef bs, int from, int to)
+{
+ int i;
+ for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
+ BITSET_SET_BIT(bs, i);
+ }
+}
+
+#if 0
+static void
+bitset_set_all(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ bs[i] = ~((Bits )0);
+ }
+}
+#endif
+
+static void
+bitset_invert(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ bs[i] = ~(bs[i]);
+ }
+}
+
+static void
+bitset_invert_to(BitSetRef from, BitSetRef to)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ to[i] = ~(from[i]);
+ }
+}
+
+static void
+bitset_and(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] &= bs[i];
+ }
+}
+
+static void
+bitset_or(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] |= bs[i];
+ }
+}
+
+static void
+bitset_copy(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < BITSET_SIZE; i++) {
+ dest[i] = bs[i];
+ }
+}
+
+extern int
+onig_strncmp(const UChar* s1, const UChar* s2, int n)
+{
+ int x;
+
+ while (n-- > 0) {
+ x = *s2++ - *s1++;
+ if (x) return x;
+ }
+ return 0;
+}
+
+static void
+k_strcpy(UChar* dest, const UChar* src, const UChar* end)
+{
+ int len = end - src;
+ if (len > 0) {
+ xmemcpy(dest, src, len);
+ dest[len] = (UChar )0;
+ }
+}
+
+static UChar*
+strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
+{
+ int slen, term_len, i;
+ UChar *r;
+
+ slen = end - s;
+ term_len = ONIGENC_MBC_MINLEN(enc);
+
+ r = (UChar* )xmalloc(slen + term_len);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, slen);
+
+ for (i = 0; i < term_len; i++)
+ r[slen + i] = (UChar )0;
+
+ return r;
+}
+
+
+/* scan pattern methods */
+#define PEND_VALUE 0
+
+#define PFETCH_READY UChar* pfetch_prev
+#define PEND (p < end ? 0 : 1)
+#define PUNFETCH p = pfetch_prev
+#define PINC do { \
+ pfetch_prev = p; \
+ p += ONIGENC_MBC_ENC_LEN(enc, p); \
+} while (0)
+#define PFETCH(c) do { \
+ c = ONIGENC_MBC_TO_CODE(enc, p, end); \
+ pfetch_prev = p; \
+ p += ONIGENC_MBC_ENC_LEN(enc, p); \
+} while (0)
+
+#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
+#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
+
+static UChar*
+k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
+ int capa)
+{
+ UChar* r;
+
+ if (dest)
+ r = (UChar* )xrealloc(dest, capa + 1);
+ else
+ r = (UChar* )xmalloc(capa + 1);
+
+ CHECK_NULL_RETURN(r);
+ k_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+
+/* dest on static area */
+static UChar*
+strcat_capa_from_static(UChar* dest, UChar* dest_end,
+ const UChar* src, const UChar* src_end, int capa)
+{
+ UChar* r;
+
+ r = (UChar* )xmalloc(capa + 1);
+ CHECK_NULL_RETURN(r);
+ k_strcpy(r, dest, dest_end);
+ k_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+
+#ifdef USE_NAMED_GROUP
+
+#define INIT_NAME_BACKREFS_ALLOC_NUM 8
+
+typedef struct {
+ UChar* name;
+ int name_len; /* byte length */
+ int back_num; /* number of backrefs */
+ int back_alloc;
+ int back_ref1;
+ int* back_refs;
+} NameEntry;
+
+#ifdef USE_ST_HASH_TABLE
+
+#include "st.h"
+
+typedef struct {
+ unsigned char* s;
+ unsigned char* end;
+} st_strend_key;
+
+static int strend_cmp(st_strend_key*, st_strend_key*);
+static int strend_hash(st_strend_key*);
+
+static struct st_hash_type type_strend_hash = {
+ strend_cmp,
+ strend_hash,
+};
+
+static st_table*
+onig_st_init_strend_table_with_size(int size)
+{
+ return onig_st_init_table_with_size(&type_strend_hash, size);
+}
+
+static int
+onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
+{
+ st_strend_key key;
+
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
+
+ return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+static int
+onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
+{
+ st_strend_key* key;
+ int result;
+
+ key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+ result = onig_st_insert(table, (st_data_t )key, value);
+ if (result) {
+ xfree(key);
+ }
+ return result;
+}
+
+static int
+strend_cmp(st_strend_key* x, st_strend_key* y)
+{
+ unsigned char *p, *q;
+ int c;
+
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static int
+strend_hash(st_strend_key* x)
+{
+ int val;
+ unsigned char *p;
+
+ val = 0;
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ return val + (val >> 5);
+}
+
+typedef st_table NameTable;
+typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
+
+#define NAMEBUF_SIZE 24
+#define NAMEBUF_SIZE_1 25
+
+#ifdef ONIG_DEBUG
+static int
+i_print_name_entry(UChar* key, NameEntry* e, void* arg)
+{
+ int i;
+ FILE* fp = (FILE* )arg;
+
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0)
+ fputs("-", fp);
+ else if (e->back_num == 1)
+ fprintf(fp, "%d", e->back_ref1);
+ else {
+ for (i = 0; i < e->back_num; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[i]);
+ }
+ }
+ fputs("\n", fp);
+ return ST_CONTINUE;
+}
+
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ fprintf(fp, "name table\n");
+ onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif
+
+static int
+i_free_name_entry(UChar* key, NameEntry* e, void* arg)
+{
+ xfree(e->name);
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ xfree(key);
+ xfree(e);
+ return ST_DELETE;
+}
+
+static int
+names_clear(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_free_name_entry, 0);
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) onig_st_free_table(t);
+ reg->name_table = (void* )NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, const UChar* name, const UChar* name_end)
+{
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ e = (NameEntry* )NULL;
+ if (IS_NOT_NULL(t)) {
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
+ }
+ return e;
+}
+
+typedef struct {
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+ int ret;
+ OnigEncoding enc;
+} INamesArg;
+
+static int
+i_names(UChar* key, NameEntry* e, INamesArg* arg)
+{
+ int r = (*(arg->func))(e->name,
+ /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
+ e->name + e->name_len,
+ e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
+ if (r != 0) {
+ arg->ret = r;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ INamesArg narg;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ narg.ret = 0;
+ if (IS_NOT_NULL(t)) {
+ narg.func = func;
+ narg.reg = reg;
+ narg.arg = arg;
+ narg.enc = reg->enc; /* should be pattern encoding. */
+ onig_st_foreach(t, i_names, (HashDataType )&narg);
+ }
+ return narg.ret;
+}
+
+static int
+i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
+{
+ int i;
+
+ if (e->back_num > 1) {
+ for (i = 0; i < e->back_num; i++) {
+ e->back_refs[i] = map[e->back_refs[i]].new_val;
+ }
+ }
+ else if (e->back_num == 1) {
+ e->back_ref1 = map[e->back_ref1].new_val;
+ }
+
+ return ST_CONTINUE;
+}
+
+extern int
+onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_renumber_name, (HashDataType )map);
+ }
+ return 0;
+}
+
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num_entries;
+ else
+ return 0;
+}
+
+#else /* USE_ST_HASH_TABLE */
+
+#define INIT_NAMES_ALLOC_NUM 8
+
+typedef struct {
+ NameEntry* e;
+ int num;
+ int alloc;
+} NameTable;
+
+
+#ifdef ONIG_DEBUG
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ int i, j;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t) && t->num > 0) {
+ fprintf(fp, "name table\n");
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0) {
+ fputs("-", fp);
+ }
+ else if (e->back_num == 1) {
+ fprintf(fp, "%d", e->back_ref1);
+ }
+ else {
+ for (j = 0; j < e->back_num; j++) {
+ if (j > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[j]);
+ }
+ }
+ fputs("\n", fp);
+ }
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif
+
+static int
+names_clear(regex_t* reg)
+{
+ int i;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (IS_NOT_NULL(e->name)) {
+ xfree(e->name);
+ e->name = NULL;
+ e->name_len = 0;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ e->back_refs = (int* )NULL;
+ }
+ }
+ if (IS_NOT_NULL(t->e)) {
+ xfree(t->e);
+ t->e = NULL;
+ }
+ t->num = 0;
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) xfree(t);
+ reg->name_table = NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, UChar* name, UChar* name_end)
+{
+ int i, len;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ len = name_end - name;
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
+ return e;
+ }
+ }
+ return (NameEntry* )NULL;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ int i, r;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ r = (*func)(e->name, e->name + e->name_len, e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ reg, arg);
+ if (r != 0) return r;
+ }
+ }
+ return 0;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num;
+ else
+ return 0;
+}
+
+#endif /* else USE_ST_HASH_TABLE */
+
+static int
+name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
+{
+ int alloc;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (name_end - name <= 0)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ e = name_find(reg, name, name_end);
+ if (IS_NULL(e)) {
+#ifdef USE_ST_HASH_TABLE
+ if (IS_NULL(t)) {
+ t = onig_st_init_strend_table_with_size(5);
+ reg->name_table = (void* )t;
+ }
+ e = (NameEntry* )xmalloc(sizeof(NameEntry));
+ CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
+
+ e->name = strdup_with_null(reg->enc, name, name_end);
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;
+ onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
+
+ e->name_len = name_end - name;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ e->back_refs = (int* )NULL;
+
+#else
+
+ if (IS_NULL(t)) {
+ alloc = INIT_NAMES_ALLOC_NUM;
+ t = (NameTable* )xmalloc(sizeof(NameTable));
+ CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
+ t->e = NULL;
+ t->alloc = 0;
+ t->num = 0;
+
+ t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
+ if (IS_NULL(t->e)) {
+ xfree(t);
+ return ONIGERR_MEMORY;
+ }
+ t->alloc = alloc;
+ reg->name_table = t;
+ goto clear;
+ }
+ else if (t->num == t->alloc) {
+ int i;
+
+ alloc = t->alloc * 2;
+ t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
+ CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
+ t->alloc = alloc;
+
+ clear:
+ for (i = t->num; i < t->alloc; i++) {
+ t->e[i].name = NULL;
+ t->e[i].name_len = 0;
+ t->e[i].back_num = 0;
+ t->e[i].back_alloc = 0;
+ t->e[i].back_refs = (int* )NULL;
+ }
+ }
+ e = &(t->e[t->num]);
+ t->num++;
+ e->name = strdup_with_null(reg->enc, name, name_end);
+ e->name_len = name_end - name;
+#endif
+ }
+
+ if (e->back_num >= 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
+
+ e->back_num++;
+ if (e->back_num == 1) {
+ e->back_ref1 = backref;
+ }
+ else {
+ if (e->back_num == 2) {
+ alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
+ e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+ e->back_alloc = alloc;
+ e->back_refs[0] = e->back_ref1;
+ e->back_refs[1] = backref;
+ }
+ else {
+ if (e->back_num > e->back_alloc) {
+ alloc = e->back_alloc * 2;
+ e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
+ CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+ e->back_alloc = alloc;
+ }
+ e->back_refs[e->back_num - 1] = backref;
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
+{
+ NameEntry* e;
+
+ e = name_find(reg, name, name_end);
+ if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
+
+ switch (e->back_num) {
+ case 0:
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion *region)
+{
+ int i, n, *nums;
+
+ n = onig_name_to_group_numbers(reg, name, name_end, &nums);
+ if (n < 0)
+ return n;
+ else if (n == 0)
+ return ONIGERR_PARSER_BUG;
+ else if (n == 1)
+ return nums[0];
+ else {
+ if (IS_NOT_NULL(region)) {
+ for (i = n - 1; i >= 0; i--) {
+ if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
+ return nums[i];
+ }
+ }
+ return nums[n - 1];
+ }
+}
+
+#else /* USE_NAMED_GROUP */
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion* region)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ return 0;
+}
+#endif /* else USE_NAMED_GROUP */
+
+extern int
+onig_noname_group_capture_is_active(regex_t* reg)
+{
+ if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ return 0;
+
+#ifdef USE_NAMED_GROUP
+ if (onig_number_of_names(reg) > 0 &&
+ IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ return 0;
+ }
+#endif
+
+ return 1;
+}
+
+
+#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
+
+static void
+scan_env_clear(ScanEnv* env)
+{
+ int i;
+
+ BIT_STATUS_CLEAR(env->capture_history);
+ BIT_STATUS_CLEAR(env->bt_mem_start);
+ BIT_STATUS_CLEAR(env->bt_mem_end);
+ BIT_STATUS_CLEAR(env->backrefed_mem);
+ env->error = (UChar* )NULL;
+ env->error_end = (UChar* )NULL;
+ env->num_call = 0;
+ env->num_mem = 0;
+#ifdef USE_NAMED_GROUP
+ env->num_named = 0;
+#endif
+ env->mem_alloc = 0;
+ env->mem_nodes_dynamic = (Node** )NULL;
+
+ for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
+ env->mem_nodes_static[i] = NULL_NODE;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ env->num_comb_exp_check = 0;
+ env->comb_exp_max_regnum = 0;
+ env->curr_max_regnum = 0;
+ env->has_recursion = 0;
+#endif
+}
+
+static int
+scan_env_add_mem_entry(ScanEnv* env)
+{
+ int i, need, alloc;
+ Node** p;
+
+ need = env->num_mem + 1;
+ if (need >= SCANENV_MEMNODES_SIZE) {
+ if (env->mem_alloc <= need) {
+ if (IS_NULL(env->mem_nodes_dynamic)) {
+ alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
+ p = (Node** )xmalloc(sizeof(Node*) * alloc);
+ xmemcpy(p, env->mem_nodes_static,
+ sizeof(Node*) * SCANENV_MEMNODES_SIZE);
+ }
+ else {
+ alloc = env->mem_alloc * 2;
+ p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
+ }
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+
+ for (i = env->num_mem + 1; i < alloc; i++)
+ p[i] = NULL_NODE;
+
+ env->mem_nodes_dynamic = p;
+ env->mem_alloc = alloc;
+ }
+ }
+
+ env->num_mem++;
+ return env->num_mem;
+}
+
+static int
+scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
+{
+ if (env->num_mem >= num)
+ SCANENV_MEM_NODES(env)[num] = node;
+ else
+ return ONIGERR_PARSER_BUG;
+ return 0;
+}
+
+
+#ifdef USE_RECYCLE_NODE
+typedef struct _FreeNode {
+ struct _FreeNode* next;
+} FreeNode;
+
+static FreeNode* FreeNodeList = (FreeNode* )NULL;
+#endif
+
+extern void
+onig_node_free(Node* node)
+{
+ start:
+ if (IS_NULL(node)) return ;
+
+ switch (NTYPE(node)) {
+ case N_STRING:
+ if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ xfree(NSTRING(node).s);
+ }
+ break;
+
+ case N_LIST:
+ case N_ALT:
+ onig_node_free(NCONS(node).left);
+ /* onig_node_free(NCONS(node).right); */
+ {
+ Node* next_node = NCONS(node).right;
+
+#ifdef USE_RECYCLE_NODE
+ {
+ FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ THREAD_ATOMIC_END;
+ }
+#else
+ xfree(node);
+#endif
+
+ node = next_node;
+ goto start;
+ }
+ break;
+
+ case N_CCLASS:
+ {
+ CClassNode* cc = &(NCCLASS(node));
+
+ if (IS_CCLASS_SHARE(cc))
+ return ;
+
+ if (cc->mbuf)
+ bbuf_free(cc->mbuf);
+ }
+ break;
+
+ case N_QUANTIFIER:
+ if (NQUANTIFIER(node).target)
+ onig_node_free(NQUANTIFIER(node).target);
+ break;
+
+ case N_EFFECT:
+ if (NEFFECT(node).target)
+ onig_node_free(NEFFECT(node).target);
+ break;
+
+ case N_BACKREF:
+ if (IS_NOT_NULL(NBACKREF(node).back_dynamic))
+ xfree(NBACKREF(node).back_dynamic);
+ break;
+
+ case N_ANCHOR:
+ if (NANCHOR(node).target)
+ onig_node_free(NANCHOR(node).target);
+ break;
+ }
+
+#ifdef USE_RECYCLE_NODE
+ {
+ FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ THREAD_ATOMIC_END;
+ }
+#else
+ xfree(node);
+#endif
+}
+
+#ifdef USE_RECYCLE_NODE
+extern int
+onig_free_node_list(void)
+{
+ FreeNode* n;
+
+ /* THREAD_ATOMIC_START; */
+ while (IS_NOT_NULL(FreeNodeList)) {
+ n = FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ xfree(n);
+ }
+ /* THREAD_ATOMIC_END; */
+ return 0;
+}
+#endif
+
+static Node*
+node_new(void)
+{
+ Node* node;
+
+#ifdef USE_RECYCLE_NODE
+ THREAD_ATOMIC_START;
+ if (IS_NOT_NULL(FreeNodeList)) {
+ node = (Node* )FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ THREAD_ATOMIC_END;
+ return node;
+ }
+ THREAD_ATOMIC_END;
+#endif
+
+ node = (Node* )xmalloc(sizeof(Node));
+ return node;
+}
+
+
+static void
+initialize_cclass(CClassNode* cc)
+{
+ BITSET_CLEAR(cc->bs);
+ cc->flags = 0;
+ cc->mbuf = NULL;
+}
+
+static Node*
+node_new_cclass(void)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CCLASS;
+
+ initialize_cclass(&(NCCLASS(node)));
+ return node;
+}
+
+static Node*
+node_new_cclass_by_codepoint_range(int not,
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+{
+ CClassNode* cc;
+ int n, i, j;
+
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CCLASS;
+
+ cc = &(NCCLASS(node));
+ cc->flags = 0;
+ if (not != 0) CCLASS_SET_NOT(cc);
+
+ BITSET_CLEAR(cc->bs);
+ if (IS_NOT_NULL(sbr)) {
+ n = ONIGENC_CODE_RANGE_NUM(sbr);
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ }
+
+ if (IS_NULL(mbr)) {
+ is_null:
+ cc->mbuf = NULL;
+ }
+ else {
+ BBuf* bbuf;
+
+ n = ONIGENC_CODE_RANGE_NUM(mbr);
+ if (n == 0) goto is_null;
+
+ bbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(bbuf, NULL);
+ bbuf->alloc = n + 1;
+ bbuf->used = n + 1;
+ bbuf->p = (UChar* )((void* )mbr);
+
+ cc->mbuf = bbuf;
+ }
+
+ return node;
+}
+
+static Node*
+node_new_ctype(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CTYPE;
+ NCTYPE(node).type = type;
+ return node;
+}
+
+static Node*
+node_new_anychar(void)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ANYCHAR;
+ return node;
+}
+
+static Node*
+node_new_list(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_LIST;
+ NCONS(node).left = left;
+ NCONS(node).right = right;
+ return node;
+}
+
+extern Node*
+onig_node_new_list(Node* left, Node* right)
+{
+ return node_new_list(left, right);
+}
+
+static Node*
+node_new_alt(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ALT;
+ NCONS(node).left = left;
+ NCONS(node).right = right;
+ return node;
+}
+
+extern Node*
+onig_node_new_anchor(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_ANCHOR;
+ NANCHOR(node).type = type;
+ NANCHOR(node).target = NULL;
+ NANCHOR(node).char_len = -1;
+ return node;
+}
+
+static Node*
+node_new_backref(int back_num, int* backrefs, int by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+ int exist_level, int nest_level,
+#endif
+ ScanEnv* env)
+{
+ int i;
+ Node* node = node_new();
+
+ CHECK_NULL_RETURN(node);
+ node->type = N_BACKREF;
+ NBACKREF(node).state = 0;
+ NBACKREF(node).back_num = back_num;
+ NBACKREF(node).back_dynamic = (int* )NULL;
+ if (by_name != 0)
+ NBACKREF(node).state |= NST_NAME_REF;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ if (exist_level != 0) {
+ NBACKREF(node).state |= NST_NEST_LEVEL;
+ NBACKREF(node).nest_level = nest_level;
+ }
+#endif
+
+ for (i = 0; i < back_num; i++) {
+ if (backrefs[i] <= env->num_mem &&
+ IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
+ NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */
+ break;
+ }
+ }
+
+ if (back_num <= NODE_BACKREFS_SIZE) {
+ for (i = 0; i < back_num; i++)
+ NBACKREF(node).back_static[i] = backrefs[i];
+ }
+ else {
+ int* p = (int* )xmalloc(sizeof(int) * back_num);
+ if (IS_NULL(p)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ NBACKREF(node).back_dynamic = p;
+ for (i = 0; i < back_num; i++)
+ p[i] = backrefs[i];
+ }
+ return node;
+}
+
+#ifdef USE_SUBEXP_CALL
+static Node*
+node_new_call(UChar* name, UChar* name_end)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ node->type = N_CALL;
+ NCALL(node).state = 0;
+ NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF;
+ NCALL(node).target = NULL_NODE;
+ NCALL(node).name = name;
+ NCALL(node).name_end = name_end;
+ return node;
+}
+#endif
+
+static Node*
+node_new_quantifier(int lower, int upper, int by_number)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_QUANTIFIER;
+ NQUANTIFIER(node).state = 0;
+ NQUANTIFIER(node).target = NULL;
+ NQUANTIFIER(node).lower = lower;
+ NQUANTIFIER(node).upper = upper;
+ NQUANTIFIER(node).greedy = 1;
+ NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQUANTIFIER(node).head_exact = NULL_NODE;
+ NQUANTIFIER(node).next_head_exact = NULL_NODE;
+ NQUANTIFIER(node).is_refered = 0;
+ if (by_number != 0)
+ NQUANTIFIER(node).state |= NST_BY_NUMBER;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ NQUANTIFIER(node).comb_exp_check_num = 0;
+#endif
+
+ return node;
+}
+
+static Node*
+node_new_effect(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_EFFECT;
+ NEFFECT(node).type = type;
+ NEFFECT(node).state = 0;
+ NEFFECT(node).regnum = 0;
+ NEFFECT(node).option = 0;
+ NEFFECT(node).target = NULL;
+ NEFFECT(node).call_addr = -1;
+ NEFFECT(node).opt_count = 0;
+ return node;
+}
+
+extern Node*
+onig_node_new_effect(int type)
+{
+ return node_new_effect(type);
+}
+
+static Node*
+node_new_effect_memory(OnigOptionType option, int is_named)
+{
+ Node* node = node_new_effect(EFFECT_MEMORY);
+ CHECK_NULL_RETURN(node);
+ if (is_named != 0)
+ SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
+
+#ifdef USE_SUBEXP_CALL
+ NEFFECT(node).option = option;
+#endif
+ return node;
+}
+
+static Node*
+node_new_option(OnigOptionType option)
+{
+ Node* node = node_new_effect(EFFECT_OPTION);
+ CHECK_NULL_RETURN(node);
+ NEFFECT(node).option = option;
+ return node;
+}
+
+extern int
+onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
+{
+ int addlen = end - s;
+
+ if (addlen > 0) {
+ int len = NSTRING(node).end - NSTRING(node).s;
+
+ if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+ UChar* p;
+ int capa = len + addlen + NODE_STR_MARGIN;
+
+ if (capa <= NSTRING(node).capa) {
+ k_strcpy(NSTRING(node).s + len, s, end);
+ }
+ else {
+ if (NSTRING(node).s == NSTRING(node).buf)
+ p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
+ s, end, capa);
+ else
+ p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
+
+ CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+ NSTRING(node).s = p;
+ NSTRING(node).capa = capa;
+ }
+ }
+ else {
+ k_strcpy(NSTRING(node).s + len, s, end);
+ }
+ NSTRING(node).end = NSTRING(node).s + len + addlen;
+ }
+
+ return 0;
+}
+
+static int
+node_str_cat_char(Node* node, UChar c)
+{
+ UChar s[1];
+
+ s[0] = c;
+ return onig_node_str_cat(node, s, s + 1);
+}
+
+extern void
+onig_node_conv_to_str_node(Node* node, int flag)
+{
+ node->type = N_STRING;
+
+ NSTRING(node).flag = flag;
+ NSTRING(node).capa = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+}
+
+extern void
+onig_node_str_clear(Node* node)
+{
+ if (NSTRING(node).capa != 0 &&
+ IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ xfree(NSTRING(node).s);
+ }
+
+ NSTRING(node).capa = 0;
+ NSTRING(node).flag = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+}
+
+static Node*
+node_new_str(const UChar* s, const UChar* end)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ node->type = N_STRING;
+ NSTRING(node).capa = 0;
+ NSTRING(node).flag = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+ if (onig_node_str_cat(node, s, end)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ return node;
+}
+
+extern Node*
+onig_node_new_str(const UChar* s, const UChar* end)
+{
+ return node_new_str(s, end);
+}
+
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+static Node*
+node_new_str_raw(UChar* s, UChar* end)
+{
+ Node* node = node_new_str(s, end);
+ NSTRING_SET_RAW(node);
+ return node;
+}
+#endif
+
+static Node*
+node_new_empty(void)
+{
+ return node_new_str(NULL, NULL);
+}
+
+static Node*
+node_new_str_char(UChar c)
+{
+ UChar p[1];
+
+ p[0] = c;
+ return node_new_str(p, p + 1);
+}
+
+static Node*
+str_node_split_last_char(StrNode* sn, OnigEncoding enc)
+{
+ const UChar *p;
+ Node* n = NULL_NODE;
+
+ if (sn->end > sn->s) {
+ p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
+ if (p && p > sn->s) { /* can be splitted. */
+ n = node_new_str(p, sn->end);
+ if ((sn->flag & NSTR_RAW) != 0)
+ NSTRING_SET_RAW(n);
+ sn->end = (UChar* )p;
+ }
+ }
+ return n;
+}
+
+static int
+str_node_can_be_split(StrNode* sn, OnigEncoding enc)
+{
+ if (sn->end > sn->s) {
+ return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
+ }
+ return 0;
+}
+
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+static int
+node_str_head_pad(StrNode* sn, int num, UChar val)
+{
+ UChar buf[NODE_STR_BUF_SIZE];
+ int i, len;
+
+ len = sn->end - sn->s;
+ onig_strcpy(buf, sn->s, sn->end);
+ onig_strcpy(&(sn->s[num]), buf, buf + len);
+ sn->end += num;
+
+ for (i = 0; i < num; i++) {
+ sn->s[i] = val;
+ }
+}
+#endif
+
+extern int
+onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
+{
+ unsigned int num, val;
+ OnigCodePoint c;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ val = (unsigned int )DIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 10UL < num)
+ return -1; /* overflow */
+
+ num = num * 10 + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ OnigCodePoint c;
+ unsigned int num, val;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
+ val = (unsigned int )XDIGITVAL(enc,c);
+ if ((INT_MAX_LIMIT - val) / 16UL < num)
+ return -1; /* overflow */
+
+ num = (num << 4) + XDIGITVAL(enc,c);
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ OnigCodePoint c;
+ unsigned int num, val;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
+ val = ODIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 8UL < num)
+ return -1; /* overflow */
+
+ num = (num << 3) + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+
+#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
+ BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
+
+/* data format:
+ [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
+ (all data size is OnigCodePoint)
+ */
+static int
+new_code_range(BBuf** pbuf)
+{
+#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
+ int r;
+ OnigCodePoint n;
+ BBuf* bbuf;
+
+ bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
+ r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
+ if (r) return r;
+
+ n = 0;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+ return 0;
+}
+
+static int
+add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
+{
+ int r, inc_n, pos;
+ int low, high, bound, x;
+ OnigCodePoint n, *data;
+ BBuf* bbuf;
+
+ if (from > to) {
+ n = from; from = to; to = n;
+ }
+
+ if (IS_NULL(*pbuf)) {
+ r = new_code_range(pbuf);
+ if (r) return r;
+ bbuf = *pbuf;
+ n = 0;
+ }
+ else {
+ bbuf = *pbuf;
+ GET_CODE_POINT(n, bbuf->p);
+ }
+ data = (OnigCodePoint* )(bbuf->p);
+ data++;
+
+ for (low = 0, bound = n; low < bound; ) {
+ x = (low + bound) >> 1;
+ if (from > data[x*2 + 1])
+ low = x + 1;
+ else
+ bound = x;
+ }
+
+ for (high = low, bound = n; high < bound; ) {
+ x = (high + bound) >> 1;
+ if (to >= data[x*2] - 1)
+ high = x + 1;
+ else
+ bound = x;
+ }
+
+ inc_n = low + 1 - high;
+ if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
+ return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
+
+ if (inc_n != 1) {
+ if (from > data[low*2])
+ from = data[low*2];
+ if (to < data[(high - 1)*2 + 1])
+ to = data[(high - 1)*2 + 1];
+ }
+
+ if (inc_n != 0 && (OnigCodePoint )high < n) {
+ int from_pos = SIZE_CODE_POINT * (1 + high * 2);
+ int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
+ int size = (n - high) * 2 * SIZE_CODE_POINT;
+
+ if (inc_n > 0) {
+ BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
+ }
+ else {
+ BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
+ }
+ }
+
+ pos = SIZE_CODE_POINT * (1 + low * 2);
+ BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
+ BBUF_WRITE_CODE_POINT(bbuf, pos, from);
+ BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
+ n += inc_n;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+
+ return 0;
+}
+
+static int
+add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
+{
+ if (from > to) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ return 0;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+
+ return add_code_range_to_buf(pbuf, from, to);
+}
+
+static int
+not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
+{
+ int r, i, n;
+ OnigCodePoint pre, from, *data, to = 0;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf)) {
+ set_all:
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ }
+
+ data = (OnigCodePoint* )(bbuf->p);
+ GET_CODE_POINT(n, data);
+ data++;
+ if (n <= 0) goto set_all;
+
+ r = 0;
+ pre = MBCODE_START_POS(enc);
+ for (i = 0; i < n; i++) {
+ from = data[i*2];
+ to = data[i*2+1];
+ if (pre <= from - 1) {
+ r = add_code_range_to_buf(pbuf, pre, from - 1);
+ if (r != 0) return r;
+ }
+ if (to == ~((OnigCodePoint )0)) break;
+ pre = to + 1;
+ }
+ if (to < ~((OnigCodePoint )0)) {
+ r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
+ }
+ return r;
+}
+
+#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
+ BBuf *tbuf; \
+ int tnot; \
+ tnot = not1; not1 = not2; not2 = tnot; \
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
+} while (0)
+
+static int
+or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
+ BBuf* bbuf2, int not2, BBuf** pbuf)
+{
+ int r;
+ OnigCodePoint i, n1, *data1;
+ OnigCodePoint from, to;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
+ if (not1 != 0 || not2 != 0)
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ return 0;
+ }
+
+ r = 0;
+ if (IS_NULL(bbuf2))
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0) {
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ }
+ else {
+ if (not2 == 0) {
+ return bbuf_clone(pbuf, bbuf2);
+ }
+ else {
+ return not_code_range_buf(enc, bbuf2, pbuf);
+ }
+ }
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ GET_CODE_POINT(n1, data1);
+ data1++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
+ r = bbuf_clone(pbuf, bbuf2);
+ }
+ else if (not1 == 0) { /* 1 OR (not 2) */
+ r = not_code_range_buf(enc, bbuf2, pbuf);
+ }
+ if (r != 0) return r;
+
+ for (i = 0; i < n1; i++) {
+ from = data1[i*2];
+ to = data1[i*2+1];
+ r = add_code_range_to_buf(pbuf, from, to);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
+ OnigCodePoint* data, int n)
+{
+ int i, r;
+ OnigCodePoint from2, to2;
+
+ for (i = 0; i < n; i++) {
+ from2 = data[i*2];
+ to2 = data[i*2+1];
+ if (from2 < from1) {
+ if (to2 < from1) continue;
+ else {
+ from1 = to2 + 1;
+ }
+ }
+ else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ r = add_code_range_to_buf(pbuf, from1, from2-1);
+ if (r != 0) return r;
+ }
+ from1 = to2 + 1;
+ }
+ else {
+ to1 = from2 - 1;
+ }
+ }
+ else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+ if (from1 <= to1) {
+ r = add_code_range_to_buf(pbuf, from1, to1);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+{
+ int r;
+ OnigCodePoint i, j, n1, n2, *data1, *data2;
+ OnigCodePoint from, to, from1, to1, from2, to2;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
+ return bbuf_clone(pbuf, bbuf2);
+ return 0;
+ }
+ else if (IS_NULL(bbuf2)) {
+ if (not2 != 0)
+ return bbuf_clone(pbuf, bbuf1);
+ return 0;
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ data2 = (OnigCodePoint* )(bbuf2->p);
+ GET_CODE_POINT(n1, data1);
+ GET_CODE_POINT(n2, data2);
+ data1++;
+ data2++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ for (j = 0; j < n2; j++) {
+ from2 = data2[j*2];
+ to2 = data2[j*2+1];
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ from = MAX(from1, from2);
+ to = MIN(to1, to2);
+ r = add_code_range_to_buf(pbuf, from, to);
+ if (r != 0) return r;
+ }
+ }
+ }
+ else if (not1 == 0) { /* 1 AND (not 2) */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ r = and_code_range1(pbuf, from1, to1, data2, n2);
+ if (r != 0) return r;
+ }
+ }
+
+ return 0;
+}
+
+static int
+and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
+{
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = IS_CCLASS_NOT(dest);
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = IS_CCLASS_NOT(cc);
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_and(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(enc, pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) return r;
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ return 0;
+}
+
+static int
+or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
+{
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = IS_CCLASS_NOT(dest);
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = IS_CCLASS_NOT(cc);
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_or(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ }
+ else {
+ r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf;
+ r = not_code_range_buf(enc, pbuf, &tbuf);
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) return r;
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ else
+ return 0;
+}
+
+static int
+conv_backslash_value(int c, ScanEnv* env)
+{
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
+ return '\v';
+ break;
+
+ default:
+ break;
+ }
+ }
+ return c;
+}
+
+static int
+is_invalid_quantifier_target(Node* node)
+{
+ switch (NTYPE(node)) {
+ case N_ANCHOR:
+ return 1;
+ break;
+
+ case N_EFFECT:
+ if (NEFFECT(node).type == EFFECT_OPTION)
+ return is_invalid_quantifier_target(NEFFECT(node).target);
+ break;
+
+ case N_LIST: /* ex. (?:\G\A)* */
+ do {
+ if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ return 0;
+ break;
+
+ case N_ALT: /* ex. (?:abc|\A)* */
+ do {
+ if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
+ } while (IS_NOT_NULL(node = NCONS(node).right));
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+static int
+popular_quantifier_num(QuantifierNode* qf)
+{
+ if (qf->greedy) {
+ if (qf->lower == 0) {
+ if (qf->upper == 1) return 0;
+ else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
+ }
+ else if (qf->lower == 1) {
+ if (IS_REPEAT_INFINITE(qf->upper)) return 2;
+ }
+ }
+ else {
+ if (qf->lower == 0) {
+ if (qf->upper == 1) return 3;
+ else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
+ }
+ else if (qf->lower == 1) {
+ if (IS_REPEAT_INFINITE(qf->upper)) return 5;
+ }
+ }
+ return -1;
+}
+
+
+enum ReduceType {
+ RQ_ASIS = 0, /* as is */
+ RQ_DEL = 1, /* delete parent */
+ RQ_A, /* to '*' */
+ RQ_AQ, /* to '*?' */
+ RQ_QQ, /* to '??' */
+ RQ_P_QQ, /* to '+)??' */
+ RQ_PQ_Q /* to '+?)?' */
+};
+
+static enum ReduceType ReduceTypeTable[6][6] = {
+ {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
+ {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
+ {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
+ {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
+};
+
+extern void
+onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
+{
+ int pnum, cnum;
+ QuantifierNode *p, *c;
+
+ p = &(NQUANTIFIER(pnode));
+ c = &(NQUANTIFIER(cnode));
+ pnum = popular_quantifier_num(p);
+ cnum = popular_quantifier_num(c);
+
+ switch(ReduceTypeTable[cnum][pnum]) {
+ case RQ_DEL:
+ *p = *c;
+ break;
+ case RQ_A:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ break;
+ case RQ_AQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ break;
+ case RQ_QQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ break;
+ case RQ_P_QQ:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
+ return ;
+ break;
+ case RQ_PQ_Q:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 1;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
+ return ;
+ break;
+ case RQ_ASIS:
+ p->target = cnode;
+ return ;
+ break;
+ }
+
+ c->target = NULL_NODE;
+ onig_node_free(cnode);
+}
+
+
+enum TokenSyms {
+ TK_EOT = 0, /* end of token */
+ TK_RAW_BYTE = 1,
+ TK_CHAR,
+ TK_STRING,
+ TK_CODE_POINT,
+ TK_ANYCHAR,
+ TK_CHAR_TYPE,
+ TK_BACKREF,
+ TK_CALL,
+ TK_ANCHOR,
+ TK_OP_REPEAT,
+ TK_INTERVAL,
+ TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ TK_ALT,
+ TK_SUBEXP_OPEN,
+ TK_SUBEXP_CLOSE,
+ TK_CC_OPEN,
+ TK_QUOTE_OPEN,
+ TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ TK_CC_CLOSE,
+ TK_CC_RANGE,
+ TK_POSIX_BRACKET_OPEN,
+ TK_CC_AND, /* && */
+ TK_CC_CC_OPEN /* [ */
+};
+
+typedef struct {
+ enum TokenSyms type;
+ int escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ UChar* backp;
+ union {
+ UChar* s;
+ int c;
+ OnigCodePoint code;
+ int anchor;
+ int subtype;
+ struct {
+ int lower;
+ int upper;
+ int greedy;
+ int possessive;
+ } repeat;
+ struct {
+ int num;
+ int ref1;
+ int* refs;
+ int by_name;
+#ifdef USE_BACKREF_AT_LEVEL
+ int exist_level;
+ int level; /* \k<name+n> */
+#endif
+ } backref;
+ struct {
+ UChar* name;
+ UChar* name_end;
+ } call;
+ struct {
+ int not;
+ } prop;
+ } u;
+} OnigToken;
+
+
+static int
+fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+{
+ int low, up, syn_allow, non_low = 0;
+ int r = 0;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
+
+ if (PEND) {
+ if (syn_allow)
+ return 1; /* "....{" : OK! */
+ else
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
+ }
+
+ if (! syn_allow) {
+ c = PPEEK;
+ if (c == ')' || c == '(' || c == '|') {
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
+ }
+ }
+
+ low = onig_scan_unsigned_number(&p, end, env->enc);
+ if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (low > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == *src) { /* can't read low */
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
+ /* allow {,n} as {0,n} */
+ low = 0;
+ non_low = 1;
+ }
+ else
+ goto invalid;
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (c == ',') {
+ UChar* prev = p;
+ up = onig_scan_unsigned_number(&p, end, env->enc);
+ if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (up > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == prev) {
+ if (non_low != 0)
+ goto invalid;
+ up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ }
+ else {
+ if (non_low != 0)
+ goto invalid;
+
+ PUNFETCH;
+ up = low; /* {n} : exact n times */
+ r = 2; /* fixed */
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
+ if (c != MC_ESC(enc)) goto invalid;
+ PFETCH(c);
+ }
+ if (c != '}') goto invalid;
+
+ if (!IS_REPEAT_INFINITE(up) && low > up) {
+ return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+ }
+
+ tok->type = TK_INTERVAL;
+ tok->u.repeat.lower = low;
+ tok->u.repeat.upper = up;
+ *src = p;
+ return r; /* 0: normal {n,m}, 2: fixed {n} */
+
+ invalid:
+ if (syn_allow)
+ return 1; /* OK */
+ else
+ return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
+}
+
+/* \M-, \C-, \c, or \... */
+static int
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
+{
+ int v;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ PFETCH(c);
+ switch (c) {
+ case 'M':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_META_CODE_SYNTAX;
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
+ c = ((c & 0xff) | 0x80);
+ }
+ else
+ goto backslash;
+ break;
+
+ case 'C':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
+ goto control;
+ }
+ else
+ goto backslash;
+
+ case 'c':
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
+ control:
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c == '?') {
+ c = 0177;
+ }
+ else {
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
+ c &= 0x9f;
+ }
+ break;
+ }
+ /* fall through */
+
+ default:
+ {
+ backslash:
+ c = conv_backslash_value(c, env);
+ }
+ break;
+ }
+
+ *src = p;
+ return c;
+}
+
+static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+
+#ifdef USE_NAMED_GROUP
+#ifdef USE_BACKREF_AT_LEVEL
+/*
+ \k<name+n>, \k<name-n>
+*/
+static int
+fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
+ , ScanEnv* env, int* level)
+{
+ int r, exist_level = 0;
+ OnigCodePoint c = 0;
+ OnigCodePoint first_code;
+ OnigEncoding enc = env->enc;
+ UChar *name_end;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ name_end = end;
+ r = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ first_code = c;
+ if (c == '>')
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == '>' || c == ')' || c == '+' || c == '-') break;
+
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (c != '>') {
+ if (c == '+' || c == '-') {
+ int num;
+ int flag = (c == '-' ? -1 : 1);
+
+ PFETCH(c);
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
+ PUNFETCH;
+ num = onig_scan_unsigned_number(&p, end, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ *level = (num * flag);
+ exist_level = 1;
+
+ PFETCH(c);
+ if (c == '>')
+ goto first_check;
+ }
+
+ err:
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+ else {
+ first_check:
+ if (ONIGENC_IS_CODE_ASCII(first_code) &&
+ ONIGENC_IS_CODE_UPPER(enc, first_code))
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return (exist_level ? 1 : 0);
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
+/*
+ def: 0 -> define name (don't allow number name)
+ 1 -> reference name (allow number name)
+*/
+static int
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+{
+ int r, is_num;
+ OnigCodePoint c = 0;
+ OnigCodePoint first_code;
+ OnigEncoding enc = env->enc;
+ UChar *name_end;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ name_end = end;
+ r = 0;
+ is_num = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ first_code = c;
+ if (c == '>')
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (ref == 1)
+ is_num = 1;
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == '>' || c == ')') break;
+
+ if (is_num == 1) {
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (!ONIGENC_IS_CODE_WORD(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+ else {
+ if (ONIGENC_IS_CODE_ASCII(first_code) &&
+ ONIGENC_IS_CODE_UPPER(enc, first_code))
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#else
+static int
+fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+{
+ int r, len;
+ OnigCodePoint c = 0;
+ UChar *name_end;
+ OnigEncoding enc = env->enc;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ r = 0;
+ while (!PEND) {
+ name_end = p;
+ if (enc_len(enc, p) > 1)
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+
+ PFETCH(c);
+ if (c == '>' || c == ')') break;
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ if (c != '>') {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ err:
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif
+
+static void
+CC_ESC_WARN(ScanEnv* env, UChar *c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
+ UChar buf[WARN_BUFSIZE];
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (UChar* )"character class has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
+ }
+}
+
+static void
+CCEND_ESC_WARN(ScanEnv* env, UChar* c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
+ UChar buf[WARN_BUFSIZE];
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
+ (env)->pattern, (env)->pattern_end,
+ (UChar* )"regular expression has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
+ }
+}
+
+static UChar*
+find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ UChar **next, OnigEncoding enc)
+{
+ int i;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ while (p < to) {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, p);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enc_len(enc, q);
+ }
+ if (i >= n) {
+ if (IS_NOT_NULL(next))
+ *next = q;
+ return p;
+ }
+ }
+ p = q;
+ }
+ return NULL_UCHARP;
+}
+
+static int
+str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ OnigCodePoint bad, OnigEncoding enc)
+{
+ int i, in_esc;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ in_esc = 0;
+ while (p < to) {
+ if (in_esc) {
+ in_esc = 0;
+ p += enc_len(enc, p);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enc_len(enc, p);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enc_len(enc, q);
+ }
+ if (i >= n) return 1;
+ p += enc_len(enc, p);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ if (x == bad) return 0;
+ else if (x == MC_ESC(enc)) in_esc = 1;
+ p = q;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int num;
+ OnigCodePoint c, c2;
+ OnigSyntaxType* syn = env->syntax;
+ OnigEncoding enc = env->enc;
+ UChar* prev;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ PFETCH(c);
+ tok->type = TK_CHAR;
+ tok->base = 0;
+ tok->u.c = c;
+ tok->escaped = 0;
+
+ if (c == ']') {
+ tok->type = TK_CC_CLOSE;
+ }
+ else if (c == '-') {
+ tok->type = TK_CC_RANGE;
+ }
+ else if (c == MC_ESC(enc)) {
+ if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
+ goto end;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ PFETCH(c);
+ tok->escaped = 1;
+ tok->u.c = c;
+ switch (c) {
+ case 'w':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WORD;
+ break;
+ case 'W':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WORD;
+ break;
+ case 'd':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_DIGIT;
+ break;
+ case 'D':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_DIGIT;
+ break;
+ case 's':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WHITE_SPACE;
+ break;
+ case 'S':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ break;
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_XDIGIT;
+ break;
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_XDIGIT;
+ break;
+
+ case 'p':
+ case 'P':
+ c2 = PPEEK;
+ if (c2 == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ break;
+
+ case '0':
+ case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ PUNFETCH;
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, 3, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ if (tok->u.c != num) {
+ tok->u.code = (OnigCodePoint )num;
+ tok->type = TK_CODE_POINT;
+ }
+ break;
+ }
+ }
+ else if (c == '[') {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
+ OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
+ tok->backp = p; /* point at '[' is readed */
+ PINC;
+ if (str_exist_check_with_esc(send, 2, p, end,
+ (OnigCodePoint )']', enc)) {
+ tok->type = TK_POSIX_BRACKET_OPEN;
+ }
+ else {
+ PUNFETCH;
+ goto cc_in_cc;
+ }
+ }
+ else {
+ cc_in_cc:
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
+ tok->type = TK_CC_CC_OPEN;
+ }
+ else {
+ CC_ESC_WARN(env, (UChar* )"[");
+ }
+ }
+ }
+ else if (c == '&') {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
+ !PEND && (PPEEK_IS('&'))) {
+ PINC;
+ tok->type = TK_CC_AND;
+ }
+ }
+
+ end:
+ *src = p;
+ return tok->type;
+}
+
+static int
+fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, num;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ OnigSyntaxType* syn = env->syntax;
+ UChar* prev;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ start:
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ tok->type = TK_STRING;
+ tok->base = 0;
+ tok->backp = p;
+
+ PFETCH(c);
+ if (IS_MC_ESC_CODE(c, enc, syn)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ tok->backp = p;
+ PFETCH(c);
+
+ tok->u.c = c;
+ tok->escaped = 1;
+ switch (c) {
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ greedy_check:
+ if (!PEND && PPEEK_IS('?') &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 0;
+ tok->u.repeat.possessive = 0;
+ }
+ else {
+ possessive_check:
+ if (!PEND && PPEEK_IS('+') &&
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
+ tok->type != TK_INTERVAL) ||
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
+ tok->type == TK_INTERVAL))) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 1;
+ }
+ else {
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 0;
+ }
+ }
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
+ r = fetch_range_quantifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
+ goto greedy_check;
+ }
+ /* r == 1 : normal char */
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case 'w':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WORD;
+ break;
+
+ case 'W':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WORD;
+ break;
+
+ case 'b':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BOUND;
+ break;
+
+ case 'B':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case '<':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BEGIN;
+ break;
+
+ case '>':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_END;
+ break;
+#endif
+
+ case 's':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_WHITE_SPACE;
+ break;
+
+ case 'S':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+ break;
+
+ case 'd':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_DIGIT;
+ break;
+
+ case 'D':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_DIGIT;
+ break;
+
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_XDIGIT;
+ break;
+
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_XDIGIT;
+ break;
+
+ case 'A':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ begin_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_BUF;
+ break;
+
+ case 'Z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_SEMI_END_BUF;
+ break;
+
+ case 'z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ end_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_END_BUF;
+ break;
+
+ case 'G':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_POSITION;
+ break;
+
+ case '`':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto begin_buf;
+ break;
+
+ case '\'':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto end_buf;
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ break;
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ PUNFETCH;
+ prev = p;
+ num = onig_scan_unsigned_number(&p, end, enc);
+ if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
+ goto skip_backref;
+ }
+
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
+ (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = num;
+ tok->u.backref.by_name = 0;
+#ifdef USE_BACKREF_AT_LEVEL
+ tok->u.backref.exist_level = 0;
+#endif
+ break;
+ }
+
+ skip_backref:
+ if (c == '8' || c == '9') {
+ /* normal char */
+ p = prev; PINC;
+ break;
+ }
+
+ p = prev;
+ /* fall through */
+ case '0':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ else if (c != '0') {
+ PINC;
+ }
+ break;
+
+#ifdef USE_NAMED_GROUP
+ case 'k':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
+ PFETCH(c);
+ if (c == '<') {
+ UChar* name_end;
+ int* backs;
+
+ prev = p;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */
+ r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+ if (r == 1) tok->u.backref.exist_level = 1;
+ else tok->u.backref.exist_level = 0;
+#else
+ r = fetch_name(&p, end, &name_end, env, 1);
+#endif
+ if (r < 0) return r;
+
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ if (num <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ int i;
+ for (i = 0; i < num; i++) {
+ if (backs[i] > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 1;
+ if (num == 1) {
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = backs[0];
+ }
+ else {
+ tok->u.backref.num = num;
+ tok->u.backref.refs = backs;
+ }
+ }
+ else
+ PUNFETCH;
+ }
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case 'g':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
+ PFETCH(c);
+ if (c == '<') {
+ UChar* name_end;
+
+ prev = p;
+ r = fetch_name(&p, end, &name_end, env, 1);
+ if (r < 0) return r;
+
+ tok->type = TK_CALL;
+ tok->u.call.name = prev;
+ tok->u.call.name_end = name_end;
+ }
+ else
+ PUNFETCH;
+ }
+ break;
+#endif
+
+ case 'Q':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
+ tok->type = TK_QUOTE_OPEN;
+ }
+ break;
+
+ case 'p':
+ case 'P':
+ if (PPEEK_IS('{') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c);
+ if (c == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ /* set_raw: */
+ if (tok->u.c != num) {
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else { /* string */
+ p = tok->backp + enc_len(enc, tok->backp);
+ }
+ break;
+ }
+ }
+ else {
+ tok->u.c = c;
+ tok->escaped = 0;
+
+#ifdef USE_VARIABLE_META_CHARS
+ if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
+ if (c == MC_ANYCHAR(enc))
+ goto any_char;
+ else if (c == MC_ANYTIME(enc))
+ goto anytime;
+ else if (c == MC_ZERO_OR_ONE_TIME(enc))
+ goto zero_or_one_time;
+ else if (c == MC_ONE_OR_MORE_TIME(enc))
+ goto one_or_more_time;
+ else if (c == MC_ANYCHAR_ANYTIME(enc)) {
+ tok->type = TK_ANYCHAR_ANYTIME;
+ goto out;
+ }
+ }
+#endif
+
+ switch (c) {
+ case '.':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ any_char:
+#endif
+ tok->type = TK_ANYCHAR;
+ break;
+
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ anytime:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ one_or_more_time:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ zero_or_one_time:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ goto greedy_check;
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
+ r = fetch_range_quantifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
+ goto greedy_check;
+ }
+ /* r == 1 : normal char */
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (PPEEK_IS('?') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PPEEK_IS('#')) {
+ PFETCH(c);
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c == MC_ESC(enc)) {
+ if (!PEND) PFETCH(c);
+ }
+ else {
+ if (c == ')') break;
+ }
+ }
+ goto start;
+ }
+ PUNFETCH;
+ }
+
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case '^':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
+ break;
+
+ case '$':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
+ break;
+
+ case '[':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
+ tok->type = TK_CC_OPEN;
+ break;
+
+ case ']':
+ if (*src > env->pattern) /* /].../ is allowed. */
+ CCEND_ESC_WARN(env, (UChar* )"]");
+ break;
+
+ case '#':
+ if (IS_EXTEND(env->option)) {
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_NEWLINE(enc, c))
+ break;
+ }
+ goto start;
+ break;
+ }
+ break;
+
+ case ' ': case '\t': case '\n': case '\r': case '\f':
+ if (IS_EXTEND(env->option))
+ goto start;
+ break;
+
+ default:
+ /* string */
+ break;
+ }
+ }
+
+#ifdef USE_VARIABLE_META_CHARS
+ out:
+#endif
+ *src = p;
+ return tok->type;
+}
+
+static int
+add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+{
+ int i, r;
+ OnigCodePoint j;
+
+ int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
+ int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
+
+ if (not == 0) {
+ for (i = 0; i < nsb; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+
+ for (i = 0; i < nmb; i++) {
+ r = add_code_range_to_buf(&(cc->mbuf),
+ ONIGENC_CODE_RANGE_FROM(mbr, i),
+ ONIGENC_CODE_RANGE_TO(mbr, i));
+ if (r != 0) return r;
+ }
+ }
+ else {
+ OnigCodePoint prev = 0;
+
+ if (ONIGENC_MBC_MINLEN(enc) == 1) {
+ for (i = 0; i < nsb; i++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
+ }
+ if (prev < 0x7f) {
+ for (j = prev; j < 0x7f; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+
+ prev = 0x80;
+ }
+
+ for (i = 0; i < nmb; i++) {
+ if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev,
+ ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
+ if (r != 0) return r;
+ }
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
+ }
+ if (prev < 0x7fffffff) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
+ if (r != 0) return r;
+ }
+ }
+
+ return 0;
+}
+
+static int
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
+{
+ int c, r;
+ const OnigCodePoint *sbr, *mbr;
+ OnigEncoding enc = env->enc;
+
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
+ if (r == 0) {
+ return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
+ }
+ else if (r != ONIG_NO_SUPPORT_CONFIG) {
+ return r;
+ }
+
+ r = 0;
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ case ONIGENC_CTYPE_BLANK:
+ case ONIGENC_CTYPE_CNTRL:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_LOWER:
+ case ONIGENC_CTYPE_PUNCT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_UPPER:
+ case ONIGENC_CTYPE_XDIGIT:
+ case ONIGENC_CTYPE_ASCII:
+ case ONIGENC_CTYPE_ALNUM:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ case ONIGENC_CTYPE_GRAPH:
+ case ONIGENC_CTYPE_PRINT:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ break;
+
+ case ONIGENC_CTYPE_WORD:
+ if (not == 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
+ && ! ONIGENC_IS_CODE_WORD(enc, c))
+ BITSET_SET_BIT(cc->bs, c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+parse_ctype_to_enc_ctype(int pctype, int* not)
+{
+ int ctype;
+
+ switch (pctype) {
+ case CTYPE_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WORD:
+ ctype = ONIGENC_CTYPE_WORD;
+ *not = 1;
+ break;
+ case CTYPE_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 0;
+ break;
+ case CTYPE_NOT_WHITE_SPACE:
+ ctype = ONIGENC_CTYPE_SPACE;
+ *not = 1;
+ break;
+ case CTYPE_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 0;
+ break;
+ case CTYPE_NOT_DIGIT:
+ ctype = ONIGENC_CTYPE_DIGIT;
+ *not = 1;
+ break;
+ case CTYPE_XDIGIT:
+ ctype = ONIGENC_CTYPE_XDIGIT;
+ *not = 0;
+ break;
+ case CTYPE_NOT_XDIGIT:
+ ctype = ONIGENC_CTYPE_XDIGIT;
+ *not = 1;
+ break;
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ return ctype;
+}
+
+typedef struct {
+ UChar *name;
+ int ctype;
+ short int len;
+} PosixBracketEntryType;
+
+static int
+parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+{
+#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
+#define POSIX_BRACKET_NAME_MAX_LEN 6
+
+ static PosixBracketEntryType PBS[] = {
+ { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int not, i, r;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ if (PPEEK_IS('^')) {
+ PINC;
+ not = 1;
+ }
+ else
+ not = 0;
+
+ if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
+ goto not_posix_bracket;
+
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
+ p = (UChar* )onigenc_step(enc, p, end, pb->len);
+ if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+
+ r = add_ctype_to_cc(cc, pb->ctype, not, env);
+ if (r != 0) return r;
+
+ PINC; PINC;
+ *src = p;
+ return 0;
+ }
+ }
+
+ not_posix_bracket:
+ c = 0;
+ i = 0;
+ while (!PEND && ((c = PPEEK) != ':') && c != ']') {
+ PINC;
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+ if (c == ':' && ! PEND) {
+ PINC;
+ if (! PEND) {
+ PFETCH(c);
+ if (c == ']')
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+ }
+ }
+
+ return 1; /* 1: is not POSIX bracket, but no error. */
+}
+
+static int
+property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
+{
+ static PosixBracketEntryType PBS[] = {
+ { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ PosixBracketEntryType *pb;
+ int len;
+
+ len = onigenc_strlen(enc, p, end);
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (len == pb->len &&
+ onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
+ return pb->ctype;
+ }
+
+ return -1;
+}
+
+static int
+fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
+{
+ int ctype;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar *prev, *start, *p = *src;
+ PFETCH_READY;
+
+ /* 'IsXXXX' => 'XXXX' */
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
+ c = PPEEK;
+ if (c == 'I') {
+ PINC;
+ if (! PEND) {
+ c = PPEEK;
+ if (c == 's')
+ PINC;
+ else
+ PUNFETCH;
+ }
+ }
+ }
+
+ start = prev = p;
+
+ while (!PEND) {
+ prev = p;
+ PFETCH(c);
+ if (c == '}') {
+ ctype = property_name_to_ctype(start, prev, enc);
+ if (ctype < 0) break;
+
+ *src = p;
+ return ctype;
+ }
+ else if (c == '(' || c == ')' || c == '{' || c == '|')
+ break;
+ }
+
+ onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
+ *src, prev);
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+static int
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, ctype;
+ CClassNode* cc;
+
+ ctype = fetch_char_property_to_ctype(src, end, env);
+ if (ctype < 0) return ctype;
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ r = add_ctype_to_cc(cc, ctype, 0, env);
+ if (r != 0) return r;
+ if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
+
+ return 0;
+}
+
+
+enum CCSTATE {
+ CCS_VALUE,
+ CCS_RANGE,
+ CCS_COMPLETE,
+ CCS_START
+};
+
+enum CCVALTYPE {
+ CCV_SB,
+ CCV_CODE_POINT,
+ CCV_CLASS
+};
+
+static int
+next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ if (*state == CCS_RANGE)
+ return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
+
+ if (*state == CCS_VALUE && *type != CCV_CLASS) {
+ if (*type == CCV_SB)
+ BITSET_SET_BIT(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ }
+
+ *state = CCS_VALUE;
+ *type = CCV_CLASS;
+ return 0;
+}
+
+static int
+next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+ int* vs_israw, int v_israw,
+ enum CCVALTYPE intype, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ switch (*state) {
+ case CCS_VALUE:
+ if (*type == CCV_SB)
+ BITSET_SET_BIT(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ break;
+
+ case CCS_RANGE:
+ if (intype == *type) {
+ if (intype == CCV_SB) {
+ if (*vs > 0xff || v > 0xff)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(cc->bs, (int )*vs, (int )v);
+ }
+ else {
+ r = add_code_range(&(cc->mbuf), env, *vs, v);
+ if (r < 0) return r;
+ }
+ }
+ else {
+#if 0
+ if (intype == CCV_CODE_POINT && *type == CCV_SB) {
+#endif
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
+ if (r < 0) return r;
+#if 0
+ }
+ else
+ return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
+#endif
+ }
+ ccs_range_end:
+ *state = CCS_COMPLETE;
+ break;
+
+ case CCS_COMPLETE:
+ case CCS_START:
+ *state = CCS_VALUE;
+ break;
+
+ default:
+ break;
+ }
+
+ *vs_israw = v_israw;
+ *vs = v;
+ *type = intype;
+ return 0;
+}
+
+static int
+code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
+ OnigEncoding enc)
+{
+ int in_esc;
+ OnigCodePoint code;
+ UChar* p = from;
+ PFETCH_READY;
+
+ in_esc = 0;
+ while (! PEND) {
+ if (ignore_escaped && in_esc) {
+ in_esc = 0;
+ }
+ else {
+ PFETCH(code);
+ if (code == c) return 1;
+ if (code == MC_ESC(enc)) in_esc = 1;
+ }
+ }
+ return 0;
+}
+
+static int
+parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, neg, len, fetched, and_start;
+ OnigCodePoint v, vs;
+ UChar *p;
+ Node* node;
+ CClassNode *cc, *prev_cc;
+ CClassNode work_cc;
+
+ enum CCSTATE state;
+ enum CCVALTYPE val_type, in_type;
+ int val_israw, in_israw;
+
+ prev_cc = (CClassNode* )NULL;
+ *np = NULL_NODE;
+ r = fetch_token_in_cc(tok, src, end, env);
+ if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
+ neg = 1;
+ r = fetch_token_in_cc(tok, src, end, env);
+ }
+ else {
+ neg = 0;
+ }
+
+ if (r < 0) return r;
+ if (r == TK_CC_CLOSE) {
+ if (! code_exist_check((OnigCodePoint )']',
+ *src, env->pattern_end, 1, env->enc))
+ return ONIGERR_EMPTY_CHAR_CLASS;
+
+ CC_ESC_WARN(env, (UChar* )"]");
+ r = tok->type = TK_CHAR; /* allow []...] */
+ }
+
+ *np = node = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+ cc = &(NCCLASS(node));
+
+ and_start = 0;
+ state = CCS_START;
+ p = *src;
+ while (r != TK_CC_CLOSE) {
+ fetched = 0;
+ switch (r) {
+ case TK_CHAR:
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
+ if (len > 1) {
+ in_type = CCV_CODE_POINT;
+ }
+ else {
+ sb_char:
+ in_type = CCV_SB;
+ }
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+ goto val_entry2;
+ break;
+
+ case TK_RAW_BYTE:
+ /* tok->base != 0 : octal or hexadec. */
+ if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
+ UChar* psave = p;
+ int i, base = tok->base;
+
+ buf[0] = tok->u.c;
+ for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ if (r != TK_RAW_BYTE || tok->base != base) {
+ fetched = 1;
+ break;
+ }
+ buf[i] = tok->u.c;
+ }
+
+ if (i < ONIGENC_MBC_MINLEN(env->enc)) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ goto err;
+ }
+
+ len = enc_len(env->enc, buf);
+ if (i < len) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ goto err;
+ }
+ else if (i > len) { /* fetch back */
+ p = psave;
+ for (i = 1; i < len; i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ }
+ fetched = 0;
+ }
+
+ if (i == 1) {
+ v = (OnigCodePoint )buf[0];
+ goto raw_single;
+ }
+ else {
+ v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
+ in_type = CCV_CODE_POINT;
+ }
+ }
+ else {
+ v = (OnigCodePoint )tok->u.c;
+ raw_single:
+ in_type = CCV_SB;
+ }
+ in_israw = 1;
+ goto val_entry2;
+ break;
+
+ case TK_CODE_POINT:
+ v = tok->u.code;
+ in_israw = 1;
+ val_entry:
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+ in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
+ val_entry2:
+ r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
+ &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_POSIX_BRACKET_OPEN:
+ r = parse_posix_bracket(cc, &p, end, env);
+ if (r < 0) goto err;
+ if (r == 1) { /* is not POSIX bracket */
+ CC_ESC_WARN(env, (UChar* )"[");
+ p = tok->backp;
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+ goto val_entry;
+ }
+ goto next_class;
+ break;
+
+ case TK_CHAR_TYPE:
+ {
+ int ctype, not;
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ r = add_ctype_to_cc(cc, ctype, not, env);
+ if (r != 0) return r;
+ }
+
+ next_class:
+ r = next_state_class(cc, &vs, &val_type, &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_CHAR_PROPERTY:
+ {
+ int ctype;
+
+ ctype = fetch_char_property_to_ctype(&p, end, env);
+ if (ctype < 0) return ctype;
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
+ goto next_class;
+ }
+ break;
+
+ case TK_CC_RANGE:
+ if (state == CCS_VALUE) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) { /* allow [x-] */
+ range_end_val:
+ v = (OnigCodePoint )'-';
+ in_israw = 0;
+ goto val_entry;
+ }
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto range_end_val;
+ }
+ state = CCS_RANGE;
+ }
+ else if (state == CCS_START) {
+ /* [-xa] is allowed */
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ /* [--x] or [a&&-x] is warned. */
+ if (r == TK_CC_RANGE || and_start != 0)
+ CC_ESC_WARN(env, (UChar* )"-");
+
+ goto val_entry;
+ }
+ else if (state == CCS_RANGE) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto sb_char; /* [!--x] is allowed */
+ }
+ else { /* CCS_COMPLETE */
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto range_end_val;
+ }
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
+ }
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
+ goto err;
+ }
+ break;
+
+ case TK_CC_CC_OPEN: /* [ */
+ {
+ Node *anode;
+ CClassNode* acc;
+
+ r = parse_char_class(&anode, tok, &p, end, env);
+ if (r != 0) goto cc_open_err;
+ acc = &(NCCLASS(anode));
+ r = or_cclass(cc, acc, env->enc);
+
+ onig_node_free(anode);
+ cc_open_err:
+ if (r != 0) goto err;
+ }
+ break;
+
+ case TK_CC_AND: /* && */
+ {
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+ /* initialize local variables */
+ and_start = 1;
+ state = CCS_START;
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env->enc);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ }
+ else {
+ prev_cc = cc;
+ cc = &work_cc;
+ }
+ initialize_cclass(cc);
+ }
+ break;
+
+ case TK_EOT:
+ r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
+ goto err;
+ break;
+ default:
+ r = ONIGERR_PARSER_BUG;
+ goto err;
+ break;
+ }
+
+ if (fetched)
+ r = tok->type;
+ else {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ }
+ }
+
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env->enc);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ cc = prev_cc;
+ }
+
+ if (neg != 0)
+ CCLASS_SET_NOT(cc);
+ else
+ CCLASS_CLEAR_NOT(cc);
+ if (IS_CCLASS_NOT(cc) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
+ int is_empty;
+
+ is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
+ if (is_empty != 0)
+ BITSET_IS_EMPTY(cc->bs, is_empty);
+
+ if (is_empty == 0) {
+#define NEWLINE_CODE 0x0a
+
+ if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
+ if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
+ BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
+ else
+ add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
+ }
+ }
+ }
+ *src = p;
+ return 0;
+
+ err:
+ if (cc != &(NCCLASS(*np)))
+ bbuf_free(cc->mbuf);
+ onig_node_free(*np);
+ return r;
+}
+
+static int parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env);
+
+static int
+parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, num;
+ int list_capture;
+ Node *target;
+ OnigOptionType option;
+ OnigEncoding enc = env->enc;
+ OnigCodePoint c;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ *np = NULL;
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+
+ option = env->option;
+ if (PPEEK_IS('?') &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ PFETCH(c);
+ switch (c) {
+ case ':': /* (?:...) grouping only */
+ group:
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(np, tok, term, &p, end, env);
+ if (r < 0) return r;
+ *src = p;
+ return 1; /* group */
+ break;
+
+ case '=':
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ);
+ break;
+ case '!': /* preceding read */
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
+ break;
+ case '>': /* (?>...) stop backtrack */
+ *np = node_new_effect(EFFECT_STOP_BACKTRACK);
+ break;
+
+ case '<': /* look behind (?<=...), (?<!...) */
+ PFETCH(c);
+ if (c == '=')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
+ else if (c == '!')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
+#ifdef USE_NAMED_GROUP
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ UChar *name;
+ UChar *name_end;
+
+ PUNFETCH;
+ list_capture = 0;
+
+ named_group:
+ name = p;
+ r = fetch_name(&p, end, &name_end, env, 0);
+ if (r < 0) return r;
+
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
+ if (r != 0) return r;
+ *np = node_new_effect_memory(env->option, 1);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
+ }
+#endif
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case '@':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
+#ifdef USE_NAMED_GROUP
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ PFETCH(c);
+ if (c == '<') {
+ list_capture = 1;
+ goto named_group; /* (?@<name>...) */
+ }
+ PUNFETCH;
+ }
+#endif
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) {
+ onig_node_free(*np);
+ return num;
+ }
+ else if (num >= BIT_STATUS_BITS_NUM) {
+ onig_node_free(*np);
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ }
+ NEFFECT(*np).regnum = num;
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
+
+#ifdef USE_POSIXLINE_OPTION
+ case 'p':
+#endif
+ case '-': case 'i': case 'm': case 's': case 'x':
+ {
+ int neg = 0;
+
+ while (1) {
+ switch (c) {
+ case ':':
+ case ')':
+ break;
+
+ case '-': neg = 1; break;
+ case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
+ case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
+ case 's':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case 'm':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ }
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+#ifdef USE_POSIXLINE_OPTION
+ case 'p':
+ ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
+ break;
+#endif
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+
+ if (c == ')') {
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ *src = p;
+ return 2; /* option only */
+ }
+ else if (c == ':') {
+ OnigOptionType prev = env->option;
+
+ env->option = option;
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ NEFFECT(*np).target = target;
+ *src = p;
+ return 0;
+ }
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ }
+ else {
+ if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ goto group;
+
+ *np = node_new_effect_memory(env->option, 0);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ NEFFECT(*np).regnum = num;
+ }
+
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ if (r < 0) return r;
+
+ if (NTYPE(*np) == N_ANCHOR)
+ NANCHOR(*np).target = target;
+ else {
+ NEFFECT(*np).target = target;
+ if (NEFFECT(*np).type == EFFECT_MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
+ if (r != 0) return r;
+ }
+ }
+
+ *src = p;
+ return 0;
+}
+
+static const char* PopularQStr[] = {
+ "?", "*", "+", "??", "*?", "+?"
+};
+
+static const char* ReduceQStr[] = {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+};
+
+static int
+set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
+{
+ QuantifierNode* qn;
+
+ qn = &(NQUANTIFIER(qnode));
+ if (qn->lower == 1 && qn->upper == 1) {
+ return 1;
+ }
+
+ switch (NTYPE(target)) {
+ case N_STRING:
+ if (! group) {
+ StrNode* sn = &(NSTRING(target));
+ if (str_node_can_be_split(sn, env->enc)) {
+ Node* n = str_node_split_last_char(sn, env->enc);
+ if (IS_NOT_NULL(n)) {
+ qn->target = n;
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case N_QUANTIFIER:
+ { /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QuantifierNode* qnt = &(NQUANTIFIER(target));
+ int nestq_num = popular_quantifier_num(qn);
+ int targetq_num = popular_quantifier_num(qnt);
+
+#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+ if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
+ UChar buf[WARN_BUFSIZE];
+
+ switch(ReduceTypeTable[targetq_num][nestq_num]) {
+ case RQ_ASIS:
+ break;
+
+ case RQ_DEL:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (UChar* )"redundant nested repeat operator");
+ (*onig_verb_warn)((char* )buf);
+ }
+ goto warn_exit;
+ break;
+
+ default:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
+ PopularQStr[targetq_num], PopularQStr[nestq_num],
+ ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
+ (*onig_verb_warn)((char* )buf);
+ }
+ goto warn_exit;
+ break;
+ }
+ }
+
+ warn_exit:
+#endif
+ if (targetq_num >= 0) {
+ if (nestq_num >= 0) {
+ onig_reduce_nested_quantifier(qnode, target);
+ goto q_exit;
+ }
+ else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ qn->target = target;
+ q_exit:
+ return 0;
+}
+
+#ifdef USE_SHARED_CCLASS_TABLE
+
+#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
+
+/* for ctype node hash table */
+
+typedef struct {
+ OnigEncoding enc;
+ int not;
+ int type;
+} type_cclass_key;
+
+static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
+{
+ if (x->type != y->type) return 1;
+ if (x->enc != y->enc) return 1;
+ if (x->not != y->not) return 1;
+ return 0;
+}
+
+static int type_cclass_hash(type_cclass_key* key)
+{
+ int i, val;
+ unsigned char *p;
+
+ val = 0;
+
+ p = (unsigned char* )&(key->enc);
+ for (i = 0; i < sizeof(key->enc); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ p = (unsigned char* )(&key->type);
+ for (i = 0; i < sizeof(key->type); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ val += key->not;
+ return val + (val >> 5);
+}
+
+static struct st_hash_type type_type_cclass_hash = {
+ type_cclass_cmp,
+ type_cclass_hash,
+};
+
+static st_table* OnigTypeCClassTable;
+
+
+static int
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
+{
+ if (IS_NOT_NULL(node)) {
+ CClassNode* cc = &(NCCLASS(node));
+ if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
+ xfree(node);
+ }
+
+ if (IS_NOT_NULL(key)) xfree(key);
+ return ST_DELETE;
+}
+
+extern int
+onig_free_shared_cclass_table(void)
+{
+ if (IS_NOT_NULL(OnigTypeCClassTable)) {
+ onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+ onig_st_free_table(OnigTypeCClassTable);
+ OnigTypeCClassTable = NULL;
+ }
+
+ return 0;
+}
+
+#endif /* USE_SHARED_CCLASS_TABLE */
+
+
+static int
+parse_exp(Node** np, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, len, group = 0;
+ Node* qn;
+ Node** targetp;
+
+ *np = NULL;
+ if (tok->type == term)
+ goto end_of_token;
+
+ switch (tok->type) {
+ case TK_ALT:
+ case TK_EOT:
+ end_of_token:
+ *np = node_new_empty();
+ return tok->type;
+ break;
+
+ case TK_SUBEXP_OPEN:
+ r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ if (r < 0) return r;
+ if (r == 1) group = 1;
+ else if (r == 2) { /* option only */
+ Node* target;
+ OnigOptionType prev = env->option;
+
+ env->option = NEFFECT(*np).option;
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, src, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ NEFFECT(*np).target = target;
+ return tok->type;
+ }
+ break;
+
+ case TK_SUBEXP_CLOSE:
+ if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
+ return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
+
+ if (tok->escaped) goto tk_raw_byte;
+ else goto tk_byte;
+ break;
+
+ case TK_STRING:
+ tk_byte:
+ {
+ *np = node_new_str(tok->backp, *src);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+
+ while (1) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_STRING) break;
+
+ r = onig_node_str_cat(*np, tok->backp, *src);
+ if (r < 0) return r;
+ }
+
+ string_end:
+ targetp = np;
+ goto repeat;
+ }
+ break;
+
+ case TK_RAW_BYTE:
+ tk_raw_byte:
+ {
+ *np = node_new_str_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ len = 1;
+ while (1) {
+ if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
+ if (len == enc_len(env->enc, NSTRING(*np).s)) {
+ r = fetch_token(tok, src, end, env);
+ goto string_end;
+ }
+ }
+
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_RAW_BYTE) {
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+ int rem;
+ if (len < ONIGENC_MBC_MINLEN(env->enc)) {
+ rem = ONIGENC_MBC_MINLEN(env->enc) - len;
+ (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
+ if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
+ goto string_end;
+ }
+ }
+#endif
+ return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ }
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
+
+ len++;
+ }
+ }
+ break;
+
+ case TK_CODE_POINT:
+ {
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
+ if (num < 0) return num;
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ *np = node_new_str_raw(buf, buf + num);
+#else
+ *np = node_new_str(buf, buf + num);
+#endif
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ }
+ break;
+
+ case TK_QUOTE_OPEN:
+ {
+ OnigCodePoint end_op[2];
+ UChar *qstart, *qend, *nextp;
+
+ end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
+ end_op[1] = (OnigCodePoint )'E';
+ qstart = *src;
+ qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
+ if (IS_NULL(qend)) {
+ nextp = qend = end;
+ }
+ *np = node_new_str(qstart, qend);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ *src = nextp;
+ }
+ break;
+
+ case TK_CHAR_TYPE:
+ {
+ switch (tok->u.subtype) {
+ case CTYPE_WORD:
+ case CTYPE_NOT_WORD:
+ *np = node_new_ctype(tok->u.subtype);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+ case CTYPE_WHITE_SPACE:
+ case CTYPE_NOT_WHITE_SPACE:
+ case CTYPE_DIGIT:
+ case CTYPE_NOT_DIGIT:
+ case CTYPE_XDIGIT:
+ case CTYPE_NOT_XDIGIT:
+ {
+ CClassNode* cc;
+ int ctype, not;
+
+#ifdef USE_SHARED_CCLASS_TABLE
+ const OnigCodePoint *sbr, *mbr;
+
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+ if (r == 0 &&
+ ONIGENC_CODE_RANGE_NUM(mbr)
+ >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
+ type_cclass_key key;
+ type_cclass_key* new_key;
+
+ key.enc = env->enc;
+ key.not = not;
+ key.type = ctype;
+
+ THREAD_ATOMIC_START;
+
+ if (IS_NULL(OnigTypeCClassTable)) {
+ OnigTypeCClassTable
+ = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
+ if (IS_NULL(OnigTypeCClassTable)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
+ (st_data_t* )np)) {
+ THREAD_ATOMIC_END;
+ break;
+ }
+ }
+
+ *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+ if (IS_NULL(*np)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+
+ CCLASS_SET_SHARE(&(NCCLASS(*np)));
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ xmemcpy(new_key, &key, sizeof(type_cclass_key));
+ onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
+ (st_data_t )*np);
+
+ THREAD_ATOMIC_END;
+ }
+ else {
+#endif
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ add_ctype_to_cc(cc, ctype, 0, env);
+ if (not != 0) CCLASS_SET_NOT(cc);
+#ifdef USE_SHARED_CCLASS_TABLE
+ }
+#endif
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ }
+ break;
+
+ case TK_CHAR_PROPERTY:
+ r = parse_char_property(np, tok, src, end, env);
+ if (r != 0) return r;
+ break;
+
+ case TK_CC_OPEN:
+ {
+ CClassNode* cc;
+
+ r = parse_char_class(np, tok, src, end, env);
+ if (r != 0) return r;
+
+ cc = &(NCCLASS(*np));
+
+ if (IS_IGNORECASE(env->option)) {
+ int i, n, in_cc;
+ const OnigPairAmbigCodes* ccs;
+ BitSetRef bs = cc->bs;
+ OnigAmbigType amb;
+
+ for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
+ if ((amb & env->ambig_flag) == 0) continue;
+
+ n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
+ for (i = 0; i < n; i++) {
+ in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
+
+ if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
+ (in_cc == 0 && IS_CCLASS_NOT(cc))) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
+ ccs[i].from >= SINGLE_BYTE_SIZE) {
+ /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
+ add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
+ }
+ else {
+ if (BITSET_AT(bs, ccs[i].from)) {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ BITSET_SET_BIT(bs, ccs[i].to);
+ }
+ if (BITSET_AT(bs, ccs[i].to)) {
+ BITSET_SET_BIT(bs, ccs[i].from);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
+
+ case TK_ANYCHAR:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+ case TK_ANYCHAR_ANYTIME:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUANTIFIER(qn).target = *np;
+ *np = qn;
+ break;
+
+ case TK_BACKREF:
+ len = tok->u.backref.num;
+ *np = node_new_backref(len,
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+ tok->u.backref.by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+ tok->u.backref.exist_level,
+ tok->u.backref.level,
+#endif
+ env);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case TK_CALL:
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ env->num_call++;
+ break;
+#endif
+
+ case TK_ANCHOR:
+ *np = onig_node_new_anchor(tok->u.anchor);
+ break;
+
+ case TK_OP_REPEAT:
+ case TK_INTERVAL:
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
+ else
+ *np = node_new_empty();
+ }
+ else {
+ goto tk_byte;
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ {
+ targetp = np;
+
+ re_entry:
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+
+ repeat:
+ if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ if (is_invalid_quantifier_target(*targetp))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+
+ qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ (r == TK_INTERVAL ? 1 : 0));
+ CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
+ NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
+ r = set_quantifier(qn, *targetp, group, env);
+ if (r < 0) return r;
+
+ if (tok->u.repeat.possessive != 0) {
+ Node* en;
+ en = node_new_effect(EFFECT_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
+ NEFFECT(en).target = qn;
+ qn = en;
+ }
+
+ if (r == 0) {
+ *targetp = qn;
+ }
+ else if (r == 2) { /* split case: /abc+/ */
+ Node *tmp;
+
+ *targetp = node_new_list(*targetp, NULL);
+ CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
+ tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
+ CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
+ targetp = &(NCONS(tmp).left);
+ }
+ goto re_entry;
+ }
+ }
+
+ return r;
+}
+
+static int
+parse_branch(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ if (r == TK_EOT || r == term || r == TK_ALT) {
+ *top = node;
+ }
+ else {
+ *top = node_new_list(node, NULL);
+ headp = &(NCONS(*top).right);
+ while (r != TK_EOT && r != term && r != TK_ALT) {
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ if (NTYPE(node) == N_LIST) {
+ *headp = node;
+ while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right;
+ headp = &(NCONS(node).right);
+ }
+ else {
+ *headp = node_new_list(node, NULL);
+ headp = &(NCONS(*headp).right);
+ }
+ }
+ }
+
+ return r;
+}
+
+/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+static int
+parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ if (r == term) {
+ *top = node;
+ }
+ else if (r == TK_ALT) {
+ *top = node_new_alt(node, NULL);
+ headp = &(NCONS(*top).right);
+ while (r == TK_ALT) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) return r;
+
+ *headp = node_new_alt(node, NULL);
+ headp = &(NCONS(*headp).right);
+ }
+
+ if (tok->type != term)
+ goto err;
+ }
+ else {
+ err:
+ if (term == TK_SUBEXP_CLOSE)
+ return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+ else
+ return ONIGERR_PARSER_BUG;
+ }
+
+ return r;
+}
+
+static int
+parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ OnigToken tok;
+
+ r = fetch_token(&tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(top, &tok, TK_EOT, src, end, env);
+ if (r < 0) return r;
+ return 0;
+}
+
+extern int
+onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg,
+ ScanEnv* env)
+{
+ int r;
+ UChar* p;
+
+#ifdef USE_NAMED_GROUP
+ names_clear(reg);
+#endif
+
+ scan_env_clear(env);
+ env->option = reg->options;
+ env->ambig_flag = reg->ambig_flag;
+ env->enc = reg->enc;
+ env->syntax = reg->syntax;
+ env->pattern = (UChar* )pattern;
+ env->pattern_end = (UChar* )end;
+ env->reg = reg;
+
+ *root = NULL;
+ p = (UChar* )pattern;
+ r = parse_regexp(root, &p, (UChar* )end, env);
+ reg->num_mem = env->num_mem;
+ return r;
+}
+
+extern void
+onig_scan_env_set_error_string(ScanEnv* env, int ecode,
+ UChar* arg, UChar* arg_end)
+{
+ env->error = arg;
+ env->error_end = arg_end;
+}
diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h
new file mode 100644
index 0000000..b25618a
--- /dev/null
+++ b/ext/mbstring/oniguruma/regparse.h
@@ -0,0 +1,328 @@
+#ifndef REGPARSE_H
+#define REGPARSE_H
+/**********************************************************************
+ regparse.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+/* node type */
+#define N_STRING (1<< 0)
+#define N_CCLASS (1<< 1)
+#define N_CTYPE (1<< 2)
+#define N_ANYCHAR (1<< 3)
+#define N_BACKREF (1<< 4)
+#define N_QUANTIFIER (1<< 5)
+#define N_EFFECT (1<< 6)
+#define N_ANCHOR (1<< 7)
+#define N_LIST (1<< 8)
+#define N_ALT (1<< 9)
+#define N_CALL (1<<10)
+
+#define IS_NODE_TYPE_SIMPLE(type) \
+ (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
+
+#define NTYPE(node) ((node)->type)
+#define NCONS(node) ((node)->u.cons)
+#define NSTRING(node) ((node)->u.str)
+#define NCCLASS(node) ((node)->u.cclass)
+#define NCTYPE(node) ((node)->u.ctype)
+#define NQUANTIFIER(node) ((node)->u.quantifier)
+#define NANCHOR(node) ((node)->u.anchor)
+#define NBACKREF(node) ((node)->u.backref)
+#define NEFFECT(node) ((node)->u.effect)
+#define NCALL(node) ((node)->u.call)
+
+#define CTYPE_WORD (1<<0)
+#define CTYPE_NOT_WORD (1<<1)
+#define CTYPE_WHITE_SPACE (1<<2)
+#define CTYPE_NOT_WHITE_SPACE (1<<3)
+#define CTYPE_DIGIT (1<<4)
+#define CTYPE_NOT_DIGIT (1<<5)
+#define CTYPE_XDIGIT (1<<6)
+#define CTYPE_NOT_XDIGIT (1<<7)
+
+#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
+#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
+
+#define EFFECT_MEMORY (1<<0)
+#define EFFECT_OPTION (1<<1)
+#define EFFECT_STOP_BACKTRACK (1<<2)
+
+#define NODE_STR_MARGIN 16
+#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_BACKREFS_SIZE 6
+
+#define NSTR_RAW (1<<0) /* by backslashed number */
+#define NSTR_AMBIG (1<<1)
+#define NSTR_AMBIG_REDUCE (1<<2)
+
+#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
+#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
+#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
+#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
+#define NSTRING_IS_AMBIG_REDUCE(node) \
+ (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
+
+#define BACKREFS_P(br) \
+ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
+
+#define NQ_TARGET_ISNOT_EMPTY 0
+#define NQ_TARGET_IS_EMPTY 1
+#define NQ_TARGET_IS_EMPTY_MEM 2
+#define NQ_TARGET_IS_EMPTY_REC 3
+
+
+typedef struct {
+ UChar* s;
+ UChar* end;
+ unsigned int flag;
+ int capa; /* (allocated size - 1) or 0: use buf[] */
+ UChar buf[NODE_STR_BUF_SIZE];
+} StrNode;
+
+/* move to regint.h */
+#if 0
+typedef struct {
+ int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+#endif
+
+typedef struct {
+ int state;
+ struct _Node* target;
+ int lower;
+ int upper;
+ int greedy;
+ int target_empty_info;
+ struct _Node* head_exact;
+ struct _Node* next_head_exact;
+ int is_refered; /* include called node. don't eliminate even if {0} */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
+#endif
+} QuantifierNode;
+
+/* status bits */
+#define NST_MIN_FIXED (1<<0)
+#define NST_MAX_FIXED (1<<1)
+#define NST_CLEN_FIXED (1<<2)
+#define NST_MARK1 (1<<3)
+#define NST_MARK2 (1<<4)
+#define NST_MEM_BACKREFED (1<<5)
+#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
+#define NST_RECURSION (1<<7)
+#define NST_CALLED (1<<8)
+#define NST_ADDR_FIXED (1<<9)
+#define NST_NAMED_GROUP (1<<10)
+#define NST_NAME_REF (1<<11)
+#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_NEST_LEVEL (1<<13)
+#define NST_BY_NUMBER (1<<14) /* {n,m} */
+
+#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
+#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
+
+#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
+#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
+#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
+#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
+#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
+#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
+#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
+#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
+#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
+ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
+
+#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
+#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
+#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
+#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
+#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
+
+typedef struct {
+ int state;
+ int type;
+ int regnum;
+ OnigOptionType option;
+ struct _Node* target;
+ AbsAddrType call_addr;
+ /* for multiple call reference */
+ OnigDistance min_len; /* min length (byte) */
+ OnigDistance max_len; /* max length (byte) */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
+} EffectNode;
+
+#define CALLNODE_REFNUM_UNDEF -1
+
+#ifdef USE_SUBEXP_CALL
+
+typedef struct {
+ int offset;
+ struct _Node* target;
+} UnsetAddr;
+
+typedef struct {
+ int num;
+ int alloc;
+ UnsetAddr* us;
+} UnsetAddrList;
+
+typedef struct {
+ int state;
+ int ref_num;
+ UChar* name;
+ UChar* name_end;
+ struct _Node* target; /* EffectNode : EFFECT_MEMORY */
+ UnsetAddrList* unset_addr_list;
+} CallNode;
+
+#endif
+
+typedef struct {
+ int state;
+ int back_num;
+ int back_static[NODE_BACKREFS_SIZE];
+ int* back_dynamic;
+ int nest_level;
+} BackrefNode;
+
+typedef struct {
+ int type;
+ struct _Node* target;
+ int char_len;
+} AnchorNode;
+
+typedef struct _Node {
+ int type;
+ union {
+ StrNode str;
+ CClassNode cclass;
+ QuantifierNode quantifier;
+ EffectNode effect;
+#ifdef USE_SUBEXP_CALL
+ CallNode call;
+#endif
+ BackrefNode backref;
+ AnchorNode anchor;
+ struct {
+ struct _Node* left;
+ struct _Node* right;
+ } cons;
+ struct {
+ int type;
+ } ctype;
+ } u;
+} Node;
+
+#define NULL_NODE ((Node* )0)
+
+#define SCANENV_MEMNODES_SIZE 8
+#define SCANENV_MEM_NODES(senv) \
+ (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
+ (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
+
+typedef struct {
+ OnigOptionType option;
+ OnigAmbigType ambig_flag;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
+ BitStatusType capture_history;
+ BitStatusType bt_mem_start;
+ BitStatusType bt_mem_end;
+ BitStatusType backrefed_mem;
+ UChar* pattern;
+ UChar* pattern_end;
+ UChar* error;
+ UChar* error_end;
+ regex_t* reg; /* for reg->names only */
+ int num_call;
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList* unset_addr_list;
+#endif
+ int num_mem;
+#ifdef USE_NAMED_GROUP
+ int num_named;
+#endif
+ int mem_alloc;
+ Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
+ Node** mem_nodes_dynamic;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int num_comb_exp_check;
+ int comb_exp_max_regnum;
+ int curr_max_regnum;
+ int has_recursion;
+#endif
+} ScanEnv;
+
+
+#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
+#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
+#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
+
+
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} GroupNumRemap;
+
+extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
+#endif
+
+extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
+extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
+extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
+extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
+extern void onig_node_conv_to_str_node P_((Node* node, int raw));
+extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
+extern void onig_node_free P_((Node* node));
+extern Node* onig_node_new_effect P_((int type));
+extern Node* onig_node_new_anchor P_((int type));
+extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
+extern Node* onig_node_new_list P_((Node* left, Node* right));
+extern void onig_node_str_clear P_((Node* node));
+extern int onig_free_node_list P_((void));
+extern int onig_names_free P_((regex_t* reg));
+extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
+
+#ifdef ONIG_DEBUG
+#ifdef USE_NAMED_GROUP
+extern int onig_print_names(FILE*, regex_t*);
+#endif
+#endif
+
+#endif /* REGPARSE_H */
diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c
new file mode 100644
index 0000000..e54b5c4
--- /dev/null
+++ b/ext/mbstring/oniguruma/regposerr.c
@@ -0,0 +1,90 @@
+/**********************************************************************
+ regposerr.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "onigposix.h"
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+static char* ESTRING[] = {
+ NULL,
+ "failed to match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "invalid collating element referenced", /* REG_ECOLLATE */
+ "invalid character class type referenced", /* REG_ECTYPE */
+ "bad backslash-escape sequence", /* REG_EESCAPE */
+ "invalid back reference number", /* REG_ESUBREG */
+ "imbalanced [ and ]", /* REG_EBRACK */
+ "imbalanced ( and )", /* REG_EPAREN */
+ "imbalanced { and }", /* REG_EBRACE */
+ "invalid repeat range {n,m}", /* REG_BADBR */
+ "invalid range", /* REG_ERANGE */
+ "Out of memory", /* REG_ESPACE */
+ "? * + not preceded by valid regular expression", /* REG_BADRPT */
+
+ /* Extended errors */
+ "internal error", /* REG_EONIG_INTERNAL */
+ "invalid wide char value", /* REG_EONIG_BADWC */
+ "invalid argument", /* REG_EONIG_BADARG */
+ "multi-thread error" /* REG_EONIG_THREAD */
+};
+
+#include <stdio.h>
+
+
+extern size_t
+regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size)
+{
+ char* s;
+ char tbuf[35];
+ size_t len;
+
+ if (posix_ecode > 0 && posix_ecode < sizeof(ESTRING) / sizeof(ESTRING[0])) {
+ s = ESTRING[posix_ecode];
+ }
+ else if (posix_ecode == 0) {
+ s = "";
+ }
+ else {
+ sprintf(tbuf, "undefined error code (%d)", posix_ecode);
+ s = tbuf;
+ }
+
+ len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
+
+ if (buf != NULL && size > 0) {
+ strncpy(buf, s, size - 1);
+ buf[size - 1] = '\0';
+ }
+ return len;
+}
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
new file mode 100644
index 0000000..a3bacf7
--- /dev/null
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -0,0 +1,303 @@
+/**********************************************************************
+ regposix.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define regex_t onig_regex_t
+#include "regint.h"
+#undef regex_t
+#include "onigposix.h"
+
+#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
+#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
+
+/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
+#define ENC_STRING_LEN(enc,s,len) do { \
+ if (ONIGENC_MBC_MINLEN(enc) == 1) { \
+ UChar* tmps = (UChar* )(s); \
+ while (*tmps != 0) tmps++; \
+ len = tmps - (UChar* )(s); \
+ } \
+ else { \
+ len = onigenc_str_bytelen_null(enc, (UChar* )s); \
+ } \
+} while(0)
+
+typedef struct {
+ int onig_err;
+ int posix_err;
+} O2PERR;
+
+static int
+onig2posix_error_code(int code)
+{
+ static const O2PERR o2p[] = {
+ { ONIG_MISMATCH, REG_NOMATCH },
+ { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
+ { ONIGERR_MEMORY, REG_ESPACE },
+ { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
+ { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
+ { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
+ { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
+ { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
+ { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
+ { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
+ { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
+ { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
+ { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
+ { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
+ { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
+ { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
+ { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
+ { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
+ { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
+ { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
+ { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
+ { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
+ { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
+
+ };
+
+ int i;
+
+ if (code >= 0) return 0;
+
+ for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) {
+ if (code == o2p[i].onig_err)
+ return o2p[i].posix_err;
+ }
+
+ return REG_EONIG_INTERNAL; /* but, unknown error code */
+}
+
+extern int
+regcomp(regex_t* reg, const char* pattern, int posix_options)
+{
+ int r, len;
+ OnigSyntaxType* syntax = OnigDefaultSyntax;
+ OnigOptionType options;
+
+ if ((posix_options & REG_EXTENDED) == 0)
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
+
+ options = syntax->options;
+ if ((posix_options & REG_ICASE) != 0)
+ ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
+ if ((posix_options & REG_NEWLINE) != 0) {
+ ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
+ ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
+ }
+
+ reg->comp_options = posix_options;
+
+ ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
+ r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
+ options, OnigEncDefaultCharEncoding, syntax,
+ (OnigErrorInfo* )NULL);
+ if (r != ONIG_NORMAL) {
+ return onig2posix_error_code(r);
+ }
+
+ reg->re_nsub = ONIG_C(reg)->num_mem;
+ return 0;
+}
+
+extern int
+regexec(regex_t* reg, const char* str, size_t nmatch,
+ regmatch_t pmatch[], int posix_options)
+{
+ int r, i, len;
+ UChar* end;
+ regmatch_t* pm;
+ OnigOptionType options;
+
+ options = ONIG_OPTION_POSIX_REGION;
+ if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
+ if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
+
+ if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
+ pm = (regmatch_t* )NULL;
+ nmatch = 0;
+ }
+ else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
+ pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
+ * (ONIG_C(reg)->num_mem + 1));
+ if (pm == NULL)
+ return REG_ESPACE;
+ }
+ else {
+ pm = pmatch;
+ }
+
+ ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
+ end = (UChar* )(str + len);
+ r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
+ (OnigRegion* )pm, options);
+
+ if (r >= 0) {
+ r = 0; /* Match */
+ if (pm != pmatch && pm != NULL) {
+ xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
+ }
+ }
+ else if (r == ONIG_MISMATCH) {
+ r = REG_NOMATCH;
+ for (i = 0; i < (int )nmatch; i++)
+ pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
+ }
+ else {
+ r = onig2posix_error_code(r);
+ }
+
+ if (pm != pmatch && pm != NULL)
+ xfree(pm);
+
+#if 0
+ if (reg->re_nsub > nmatch - 1)
+ reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
+#endif
+
+ return r;
+}
+
+extern void
+regfree(regex_t* reg)
+{
+ onig_free(ONIG_C(reg));
+}
+
+
+extern void
+reg_set_encoding(int mb_code)
+{
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case REG_POSIX_ENCODING_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case REG_POSIX_ENCODING_EUC_JP:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case REG_POSIX_ENCODING_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case REG_POSIX_ENCODING_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ case REG_POSIX_ENCODING_UTF16_BE:
+ enc = ONIG_ENCODING_UTF16_BE;
+ break;
+ case REG_POSIX_ENCODING_UTF16_LE:
+ enc = ONIG_ENCODING_UTF16_LE;
+ break;
+
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
+}
+
+extern int
+reg_name_to_group_numbers(regex_t* reg,
+ const unsigned char* name, const unsigned char* name_end, int** nums)
+{
+ return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
+}
+
+typedef struct {
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+} i_wrap;
+
+static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
+ int ng, int* gs,
+ onig_regex_t* reg, void* arg)
+{
+ i_wrap* warg = (i_wrap* )arg;
+
+ return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
+}
+
+extern int
+reg_foreach_name(regex_t* reg,
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ i_wrap warg;
+
+ warg.func = func;
+ warg.reg = reg;
+ warg.arg = arg;
+
+ return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
+}
+
+extern int
+reg_number_of_names(regex_t* reg)
+{
+ return onig_number_of_names(ONIG_C(reg));
+}
diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c
new file mode 100644
index 0000000..9114e39
--- /dev/null
+++ b/ext/mbstring/oniguruma/regsyntax.c
@@ -0,0 +1,236 @@
+/**********************************************************************
+ regsyntax.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+OnigSyntaxType OnigSyntaxASIS = {
+ 0
+ , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
+ , 0
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxPosixBasic = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL )
+ , 0
+ , 0
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxPosixExtended = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
+ ONIG_SYN_OP_BRACE_INTERVAL |
+ ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
+ , 0
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxEmacs = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL |
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
+ ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
+ ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
+ , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+ , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGrep = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
+ ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
+ , 0
+ , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGnuRegex = {
+ SYN_GNU_REGEX_OP
+ , 0
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxJava = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
+ ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
+ , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
+ , ONIG_OPTION_SINGLELINE
+};
+
+OnigSyntaxType OnigSyntaxPerl = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_SINGLELINE
+};
+
+/* Perl + named group */
+OnigSyntaxType OnigSyntaxPerl_NG = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
+ ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
+ , ONIG_OPTION_SINGLELINE
+};
+
+
+
+extern int
+onig_set_default_syntax(OnigSyntaxType* syntax)
+{
+ if (IS_NULL(syntax))
+ syntax = ONIG_SYNTAX_RUBY;
+
+ OnigDefaultSyntax = syntax;
+ return 0;
+}
+
+extern void
+onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+{
+ *to = *from;
+}
+
+extern void
+onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+{
+ syntax->op = op;
+}
+
+extern void
+onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+{
+ syntax->op2 = op2;
+}
+
+extern void
+onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+{
+ syntax->behavior = behavior;
+}
+
+extern void
+onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+{
+ syntax->options = options;
+}
+
+extern unsigned int
+onig_get_syntax_op(OnigSyntaxType* syntax)
+{
+ return syntax->op;
+}
+
+extern unsigned int
+onig_get_syntax_op2(OnigSyntaxType* syntax)
+{
+ return syntax->op2;
+}
+
+extern unsigned int
+onig_get_syntax_behavior(OnigSyntaxType* syntax)
+{
+ return syntax->behavior;
+}
+
+extern OnigOptionType
+onig_get_syntax_options(OnigSyntaxType* syntax)
+{
+ return syntax->options;
+}
+
+#ifdef USE_VARIABLE_META_CHARS
+extern int onig_set_meta_char(OnigEncoding enc,
+ unsigned int what, OnigCodePoint code)
+{
+ switch (what) {
+ case ONIG_META_CHAR_ESCAPE:
+ enc->meta_char_table.esc = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR:
+ enc->meta_char_table.anychar = code;
+ break;
+ case ONIG_META_CHAR_ANYTIME:
+ enc->meta_char_table.anytime = code;
+ break;
+ case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
+ enc->meta_char_table.zero_or_one_time = code;
+ break;
+ case ONIG_META_CHAR_ONE_OR_MORE_TIME:
+ enc->meta_char_table.one_or_more_time = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR_ANYTIME:
+ enc->meta_char_table.anychar_anytime = code;
+ break;
+ default:
+ return ONIGERR_INVALID_ARGUMENT;
+ break;
+ }
+ return 0;
+}
+#endif /* USE_VARIABLE_META_CHARS */
diff --git a/ext/mbstring/oniguruma/regtrav.c b/ext/mbstring/oniguruma/regtrav.c
new file mode 100644
index 0000000..58a17f5
--- /dev/null
+++ b/ext/mbstring/oniguruma/regtrav.c
@@ -0,0 +1,76 @@
+/**********************************************************************
+ regtrav.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+#ifdef USE_CAPTURE_HISTORY
+
+static int
+capture_tree_traverse(OnigCaptureTreeNode* node, int at,
+ int(*callback_func)(int,int,int,int,int,void*),
+ int level, void* arg)
+{
+ int r, i;
+
+ if (node == (OnigCaptureTreeNode* )0)
+ return 0;
+
+ if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
+ r = (*callback_func)(node->group, node->beg, node->end,
+ level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
+ if (r != 0) return r;
+ }
+
+ for (i = 0; i < node->num_childs; i++) {
+ r = capture_tree_traverse(node->childs[i], at,
+ callback_func, level + 1, arg);
+ if (r != 0) return r;
+ }
+
+ if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
+ r = (*callback_func)(node->group, node->beg, node->end,
+ level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
+ if (r != 0) return r;
+ }
+
+ return 0;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
+extern int
+onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*callback_func)(int,int,int,int,int,void*), void* arg)
+{
+#ifdef USE_CAPTURE_HISTORY
+ return capture_tree_traverse(region->history_root, at,
+ callback_func, 0, arg);
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
+}
diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c
new file mode 100644
index 0000000..5fad0cc
--- /dev/null
+++ b/ext/mbstring/oniguruma/regversion.c
@@ -0,0 +1,55 @@
+/**********************************************************************
+ regversion.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "oniguruma.h"
+#include <stdio.h>
+
+extern const char*
+onig_version(void)
+{
+ static char s[12];
+
+ sprintf(s, "%d.%d.%d",
+ ONIGURUMA_VERSION_MAJOR,
+ ONIGURUMA_VERSION_MINOR,
+ ONIGURUMA_VERSION_TEENY);
+ return s;
+}
+
+extern const char*
+onig_copyright(void)
+{
+ static char s[58];
+
+ sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako",
+ ONIGURUMA_VERSION_MAJOR,
+ ONIGURUMA_VERSION_MINOR,
+ ONIGURUMA_VERSION_TEENY);
+ return s;
+}
diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c
new file mode 100644
index 0000000..2324da2
--- /dev/null
+++ b/ext/mbstring/oniguruma/st.c
@@ -0,0 +1,589 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+
+#ifdef NOT_RUBY
+#include "regint.h"
+#else
+#ifdef RUBY_PLATFORM
+#define xmalloc ruby_xmalloc
+#define xcalloc ruby_xcalloc
+#define xrealloc ruby_xrealloc
+#define xfree ruby_xfree
+
+void *xmalloc(long);
+void *xcalloc(long, long);
+void *xrealloc(void *, long);
+void xfree(void *);
+#endif
+#endif
+
+#include "st.h"
+
+typedef struct st_table_entry st_table_entry;
+
+struct st_table_entry {
+ unsigned int hash;
+ st_data_t key;
+ st_data_t record;
+ st_table_entry *next;
+};
+
+#define ST_DEFAULT_MAX_DENSITY 5
+#define ST_DEFAULT_INIT_TABLE_SIZE 11
+
+ /*
+ * DEFAULT_MAX_DENSITY is the default for the largest we allow the
+ * average number of items per bin before increasing the number of
+ * bins
+ *
+ * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
+ * allocated initially
+ *
+ */
+
+static int numcmp(long, long);
+static int numhash(long);
+static struct st_hash_type type_numhash = {
+ numcmp,
+ numhash,
+};
+
+/* extern int strcmp(const char *, const char *); */
+static int strhash(const char *);
+static struct st_hash_type type_strhash = {
+ strcmp,
+ strhash,
+};
+
+static void rehash(st_table *);
+
+#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
+#define Calloc(n,s) (char*)xcalloc((n),(s))
+
+#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
+
+#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
+#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
+
+/*
+ * MINSIZE is the minimum size of a dictionary.
+ */
+
+#define MINSIZE 8
+
+/*
+Table of prime numbers 2^n+a, 2<=n<=30.
+*/
+static const long primes[] = {
+ 8 + 3,
+ 16 + 3,
+ 32 + 5,
+ 64 + 3,
+ 128 + 3,
+ 256 + 27,
+ 512 + 9,
+ 1024 + 9,
+ 2048 + 5,
+ 4096 + 3,
+ 8192 + 27,
+ 16384 + 43,
+ 32768 + 3,
+ 65536 + 45,
+ 131072 + 29,
+ 262144 + 3,
+ 524288 + 21,
+ 1048576 + 7,
+ 2097152 + 17,
+ 4194304 + 15,
+ 8388608 + 9,
+ 16777216 + 43,
+ 33554432 + 35,
+ 67108864 + 15,
+ 134217728 + 29,
+ 268435456 + 3,
+ 536870912 + 11,
+ 1073741824 + 85,
+ 0
+};
+
+static int
+new_size(size)
+ int size;
+{
+ int i;
+
+#if 0
+ for (i=3; i<31; i++) {
+ if ((1<<i) > size) return 1<<i;
+ }
+ return -1;
+#else
+ int newsize;
+
+ for (i = 0, newsize = MINSIZE;
+ i < (int )(sizeof(primes)/sizeof(primes[0]));
+ i++, newsize <<= 1)
+ {
+ if (newsize > size) return primes[i];
+ }
+ /* Ran out of polynomials */
+ return -1; /* should raise exception */
+#endif
+}
+
+#ifdef HASH_LOG
+static int collision = 0;
+static int init_st = 0;
+
+static void
+stat_col()
+{
+ FILE *f = fopen("/tmp/col", "w");
+ fprintf(f, "collision: %d\n", collision);
+ fclose(f);
+}
+#endif
+
+st_table*
+st_init_table_with_size(type, size)
+ struct st_hash_type *type;
+ int size;
+{
+ st_table *tbl;
+
+#ifdef HASH_LOG
+ if (init_st == 0) {
+ init_st = 1;
+ atexit(stat_col);
+ }
+#endif
+
+ size = new_size(size); /* round up to prime number */
+
+ tbl = alloc(st_table);
+ tbl->type = type;
+ tbl->num_entries = 0;
+ tbl->num_bins = size;
+ tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+
+ return tbl;
+}
+
+st_table*
+st_init_table(type)
+ struct st_hash_type *type;
+{
+ return st_init_table_with_size(type, 0);
+}
+
+st_table*
+st_init_numtable(void)
+{
+ return st_init_table(&type_numhash);
+}
+
+st_table*
+st_init_numtable_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_numhash, size);
+}
+
+st_table*
+st_init_strtable(void)
+{
+ return st_init_table(&type_strhash);
+}
+
+st_table*
+st_init_strtable_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_strhash, size);
+}
+
+void
+st_free_table(table)
+ st_table *table;
+{
+ register st_table_entry *ptr, *next;
+ int i;
+
+ for(i = 0; i < table->num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
+ next = ptr->next;
+ free(ptr);
+ ptr = next;
+ }
+ }
+ free(table->bins);
+ free(table);
+}
+
+#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
+((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
+
+#ifdef HASH_LOG
+#define COLLISION collision++
+#else
+#define COLLISION
+#endif
+
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
+ bin_pos = hash_val%(table)->num_bins;\
+ ptr = (table)->bins[bin_pos];\
+ if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
+ COLLISION;\
+ while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
+ ptr = ptr->next;\
+ }\
+ ptr = ptr->next;\
+ }\
+} while (0)
+
+int
+st_lookup(table, key, value)
+ st_table *table;
+ register st_data_t key;
+ st_data_t *value;
+{
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ return 0;
+ }
+ else {
+ if (value != 0) *value = ptr->record;
+ return 1;
+ }
+}
+
+#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
+do {\
+ st_table_entry *entry;\
+ if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
+ rehash(table);\
+ bin_pos = hash_val % table->num_bins;\
+ }\
+ \
+ entry = alloc(st_table_entry);\
+ \
+ entry->hash = hash_val;\
+ entry->key = key;\
+ entry->record = value;\
+ entry->next = table->bins[bin_pos];\
+ table->bins[bin_pos] = entry;\
+ table->num_entries++;\
+} while (0)
+
+int
+st_insert(table, key, value)
+ register st_table *table;
+ register st_data_t key;
+ st_data_t value;
+{
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+ return 0;
+ }
+ else {
+ ptr->record = value;
+ return 1;
+ }
+}
+
+void
+st_add_direct(table, key, value)
+ st_table *table;
+ st_data_t key;
+ st_data_t value;
+{
+ unsigned int hash_val, bin_pos;
+
+ hash_val = do_hash(key, table);
+ bin_pos = hash_val % table->num_bins;
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+}
+
+static void
+rehash(table)
+ register st_table *table;
+{
+ register st_table_entry *ptr, *next, **new_bins;
+ int i, old_num_bins = table->num_bins, new_num_bins;
+ unsigned int hash_val;
+
+ new_num_bins = new_size(old_num_bins+1);
+ new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
+
+ for(i = 0; i < old_num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
+ next = ptr->next;
+ hash_val = ptr->hash % new_num_bins;
+ ptr->next = new_bins[hash_val];
+ new_bins[hash_val] = ptr;
+ ptr = next;
+ }
+ }
+ free(table->bins);
+ table->num_bins = new_num_bins;
+ table->bins = new_bins;
+}
+
+st_table*
+st_copy(old_table)
+ st_table *old_table;
+{
+ st_table *new_table;
+ st_table_entry *ptr, *entry;
+ int i, num_bins = old_table->num_bins;
+
+ new_table = alloc(st_table);
+ if (new_table == 0) {
+ return 0;
+ }
+
+ *new_table = *old_table;
+ new_table->bins = (st_table_entry**)
+ Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+
+ if (new_table->bins == 0) {
+ free(new_table);
+ return 0;
+ }
+
+ for(i = 0; i < num_bins; i++) {
+ new_table->bins[i] = 0;
+ ptr = old_table->bins[i];
+ while (ptr != 0) {
+ entry = alloc(st_table_entry);
+ if (entry == 0) {
+ free(new_table->bins);
+ free(new_table);
+ return 0;
+ }
+ *entry = *ptr;
+ entry->next = new_table->bins[i];
+ new_table->bins[i] = entry;
+ ptr = ptr->next;
+ }
+ }
+ return new_table;
+}
+
+int
+st_delete(table, key, value)
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
+{
+ unsigned int hash_val;
+ st_table_entry *tmp;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
+
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ if (EQUAL(table, *key, ptr->key)) {
+ table->bins[hash_val] = ptr->next;
+ table->num_entries--;
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ }
+
+ for(; ptr->next != 0; ptr = ptr->next) {
+ if (EQUAL(table, ptr->next->key, *key)) {
+ tmp = ptr->next;
+ ptr->next = ptr->next->next;
+ table->num_entries--;
+ if (value != 0) *value = tmp->record;
+ *key = tmp->key;
+ free(tmp);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+st_delete_safe(table, key, value, never)
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
+ st_data_t never;
+{
+ unsigned int hash_val;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
+
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ for(; ptr != 0; ptr = ptr->next) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
+ table->num_entries--;
+ *key = ptr->key;
+ if (value != 0) *value = ptr->record;
+ ptr->key = ptr->record = never;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+delete_never(key, value, never)
+ st_data_t key, value, never;
+{
+ if (value == never) return ST_DELETE;
+ return ST_CONTINUE;
+}
+
+void
+st_cleanup_safe(table, never)
+ st_table *table;
+ st_data_t never;
+{
+ int num_entries = table->num_entries;
+
+ st_foreach(table, delete_never, never);
+ table->num_entries = num_entries;
+}
+
+int
+st_foreach(table, func, arg)
+ st_table *table;
+ int (*func)();
+ st_data_t arg;
+{
+ st_table_entry *ptr, *last, *tmp;
+ enum st_retval retval;
+ int i;
+
+ for(i = 0; i < table->num_bins; i++) {
+ last = 0;
+ for(ptr = table->bins[i]; ptr != 0;) {
+ retval = (*func)(ptr->key, ptr->record, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ tmp = 0;
+ if (i < table->num_bins) {
+ for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
+ if (tmp == ptr) break;
+ }
+ }
+ if (!tmp) {
+ /* call func with error notice */
+ return 1;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ last = ptr;
+ ptr = ptr->next;
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ tmp = ptr;
+ if (last == 0) {
+ table->bins[i] = ptr->next;
+ }
+ else {
+ last->next = ptr->next;
+ }
+ ptr = ptr->next;
+ free(tmp);
+ table->num_entries--;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+strhash(string)
+ register const char *string;
+{
+ register int c;
+
+#ifdef HASH_ELFHASH
+ register unsigned int h = 0, g;
+
+ while ((c = *string++) != '\0') {
+ h = ( h << 4 ) + c;
+ if ( g = h & 0xF0000000 )
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+#elif HASH_PERL
+ register int val = 0;
+
+ while ((c = *string++) != '\0') {
+ val += c;
+ val += (val << 10);
+ val ^= (val >> 6);
+ }
+ val += (val << 3);
+ val ^= (val >> 11);
+
+ return val + (val << 15);
+#else
+ register int val = 0;
+
+ while ((c = *string++) != '\0') {
+ val = val*997 + c;
+ }
+
+ return val + (val>>5);
+#endif
+}
+
+static int
+numcmp(x, y)
+ long x, y;
+{
+ return x != y;
+}
+
+static int
+numhash(n)
+ long n;
+{
+ return n;
+}
diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h
new file mode 100644
index 0000000..da65e7f
--- /dev/null
+++ b/ext/mbstring/oniguruma/st.h
@@ -0,0 +1,63 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* @(#) st.h 5.1 89/12/14 */
+
+#ifndef ST_INCLUDED
+
+#define ST_INCLUDED
+
+typedef unsigned long st_data_t;
+#define ST_DATA_T_DEFINED
+
+typedef struct st_table st_table;
+
+struct st_hash_type {
+ int (*compare)();
+ int (*hash)();
+};
+
+struct st_table {
+ struct st_hash_type *type;
+ int num_bins;
+ int num_entries;
+ struct st_table_entry **bins;
+};
+
+#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
+
+enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
+
+#ifndef _
+# define _(args) args
+#endif
+#ifndef ANYARGS
+# ifdef __cplusplus
+# define ANYARGS ...
+# else
+# define ANYARGS
+# endif
+#endif
+
+st_table *st_init_table _((struct st_hash_type *));
+st_table *st_init_table_with_size _((struct st_hash_type *, int));
+st_table *st_init_numtable _((void));
+st_table *st_init_numtable_with_size _((int));
+st_table *st_init_strtable _((void));
+st_table *st_init_strtable_with_size _((int));
+int st_delete _((st_table *, st_data_t *, st_data_t *));
+int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
+int st_insert _((st_table *, st_data_t, st_data_t));
+int st_lookup _((st_table *, st_data_t, st_data_t *));
+int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
+void st_add_direct _((st_table *, st_data_t, st_data_t));
+void st_free_table _((st_table *));
+void st_cleanup_safe _((st_table *, st_data_t));
+st_table *st_copy _((st_table *));
+
+#define ST_NUMCMP ((int (*)()) 0)
+#define ST_NUMHASH ((int (*)()) -2)
+
+#define st_numcmp ST_NUMCMP
+#define st_numhash ST_NUMHASH
+
+#endif /* ST_INCLUDED */
diff --git a/ext/mbstring/oniguruma/win32/config.h b/ext/mbstring/oniguruma/win32/config.h
new file mode 100644
index 0000000..bdbdaf2
--- /dev/null
+++ b/ext/mbstring/oniguruma/win32/config.h
@@ -0,0 +1,84 @@
+#define STDC_HEADERS 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_FLOAT_H 1
+#define HAVE_OFF_T 1
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 0
+#define SIZEOF___INT64 8
+#define SIZEOF_OFF_T 4
+#define SIZEOF_VOIDP 4
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+#define HAVE_PROTOTYPES 1
+#define TOKEN_PASTE(x,y) x##y
+#define HAVE_STDARG_PROTOTYPES 1
+#ifndef NORETURN
+#if _MSC_VER > 1100
+#define NORETURN(x) __declspec(noreturn) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+#define HAVE_DECL_SYS_NERR 1
+#define STDC_HEADERS 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_LIMITS_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_SYS_UTIME_H 1
+#define HAVE_MEMORY_H 1
+#define uid_t int
+#define gid_t int
+#define HAVE_STRUCT_STAT_ST_RDEV 1
+#define HAVE_ST_RDEV 1
+#define GETGROUPS_T int
+#define RETSIGTYPE void
+#define HAVE_ALLOCA 1
+#define HAVE_DUP2 1
+#define HAVE_MEMCMP 1
+#define HAVE_MEMMOVE 1
+#define HAVE_MKDIR 1
+#define HAVE_STRCASECMP 1
+#define HAVE_STRNCASECMP 1
+#define HAVE_STRERROR 1
+#define HAVE_STRFTIME 1
+#define HAVE_STRCHR 1
+#define HAVE_STRSTR 1
+#define HAVE_STRTOD 1
+#define HAVE_STRTOL 1
+#define HAVE_STRTOUL 1
+#define HAVE_FLOCK 1
+#define HAVE_VSNPRINTF 1
+#define HAVE_FINITE 1
+#define HAVE_FMOD 1
+#define HAVE_FREXP 1
+#define HAVE_HYPOT 1
+#define HAVE_MODF 1
+#define HAVE_WAITPID 1
+#define HAVE_CHSIZE 1
+#define HAVE_TIMES 1
+#define HAVE__SETJMP 1
+#define HAVE_TELLDIR 1
+#define HAVE_SEEKDIR 1
+#define HAVE_MKTIME 1
+#define HAVE_COSH 1
+#define HAVE_SINH 1
+#define HAVE_TANH 1
+#define HAVE_EXECVE 1
+#define HAVE_TZNAME 1
+#define HAVE_DAYLIGHT 1
+#define SETPGRP_VOID 1
+#define inline __inline
+#define NEED_IO_SEEK_BETWEEN_RW 1
+#define RSHIFT(x,y) ((x)>>(int)y)
+#define FILE_COUNT _cnt
+#define FILE_READPTR _ptr
+#define DEFAULT_KCODE KCODE_NONE
+#define DLEXT ".so"
+#define DLEXT2 ".dll"