diff options
Diffstat (limited to 'ext/mbstring/oniguruma')
66 files changed, 40802 insertions, 0 deletions
diff --git a/ext/mbstring/oniguruma/AUTHORS b/ext/mbstring/oniguruma/AUTHORS new file mode 100644 index 0000000..93167bd --- /dev/null +++ b/ext/mbstring/oniguruma/AUTHORS @@ -0,0 +1 @@ +sndgk393 AT ybb DOT ne DOT jp (K.Kosako) diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING new file mode 100644 index 0000000..4d321bb --- /dev/null +++ b/ext/mbstring/oniguruma/COPYING @@ -0,0 +1,32 @@ +Oniguruma LICENSE +----------------- + +When this software is partly used or it is distributed with Ruby, +this of Ruby follows the license of Ruby. +It follows the BSD license in the case of the one except for it. + +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY new file mode 100644 index 0000000..a1debef --- /dev/null +++ b/ext/mbstring/oniguruma/HISTORY @@ -0,0 +1,1838 @@ +History + +2007/08/16: Version 4.7.1 + +2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/07/04: [spec] (thanks K.Takata) + ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl) +2007/07/04: [dist] (thanks K.Takata) + fix documents API and API.ja. + +2007/06/18: Version 4.7.0 + +2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/06/18: [bug] (thanks KUBO Takehiro) + WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint). +2007/06/05: [impl] add #ifndef vsnprintf in regint.h. +2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case + in onig_search(). + +2007/04/12: Version 4.6.2 + +2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000 + to 0x4000. +2007/03/26: [impl] add 'void' to function declarations. + +2007/03/06: Version 4.6.1 + +2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/03/06: [bug] add #include <malloc.h> for bcc32. + (In bcc32, alloca() is declared in malloc.h.) +2007/03/06: [impl] remove including version.h of Ruby. +2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search(). + ex. /\n\Z/.match("aaaaaaaaaa\n") +2007/03/02: [impl] move range > start check position in end_buf process. + +2007/02/08: Version 4.6.0 + +2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/01/09: [tune] select_opt_exact_info() didn't work for empty info. + ex. /.a/ make MAP info instead of EXACT info. +2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode. +2006/12/22: [spec] should check too short multibyte char in parse_exp(). + add USE_PAD_TO_SHORT_BYTE_CHAR. + ex. /\x00/ in UTF16 should be error. + +2006/11/17: Version 4.5.1 + +2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/15: [impl] remove CHECK_INTERRUPT. +2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e + should be [:punct:]. +2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER. +2006/11/07: [bug] (thanks Byte) + add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R. + +2006/11/06: Version 4.5.0 + +2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND. +2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of + the string range. + add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. + +2006/10/30: Version 4.4.6 + +2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/30: [impl] (thanks K.Takata) + add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END. +2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner) + invalid offset value was used in STATE_CHECK_BUFF_INIT(). + +2006/10/24: Version 4.4.5 + +2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/24: [impl] escape -Wall warning. +2006/10/24: [tune] (thanks Kornelius Kalnbach) + String#scan for long string needs long time compare with + old Ruby + by initialization time for combination explosion check + ex. ("test " * 100_000).scan(/\w*\s?/) + change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000. + reduce initialization area of state_check_buff. +2006/10/16: [bug] (thanks Akinori Musha) + first argument of rb_warn() should be format string. +2006/10/10: [impl] add msa.state_check_buff_size initialization + in onig_search(). +2006/10/10: [bug] should call onig_st_free_table() in + onig_free_shared_cclass_table(). +2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. +2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT(). + make valgrind happy. +2006/09/22: [impl] convert to ascii for parameter string in + onig_error_code_to_str(). + add enc member into OnigErrorInfo. + +2006/09/19: Version 4.4.4 + +2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/19: [impl] (thanks KOYAMA Tetsuji) + HAVE_STDARG_PROTOTYPES was not defined in Mac OS X + by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc... + +2006/09/15: Version 4.4.3 + +2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/15: [bug] (thanks Allan Odgaard) + out of range access in bm_search_notrev(). + (p < s) + +2006/09/08: Version 4.4.2 + +2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/08: [bug] (thanks K.Takata) + out of range access in bm_search_notrev(). +2006/09/04: [spec] (thanks K.Takata) + allow look-behind in negative look-behind. + ex. /(?<!(?<=a)b|c)d/ + +2006/08/29: Version 4.4.1 + +2006/08/29: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/08/29: [dist] (thanks Seiji Masugata) + add configure option --enable-combination-explosion-check + +2006/08/25: Version 4.4.0 + +2006/08/25: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/08/25: [impl] add_state_check_num() should be enclosed in + ifdef USE_COMBINATION_EXPLOSION_CHECK. +2006/08/23: [spec] config USE_COMBINATION_EXPLOSION_CHECK is enabled + in Ruby mode only. +2006/08/22: [impl] remove last line comma in enum OpCode. +2006/08/22: [impl] remove OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT and + OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT. +2006/08/22: [impl] remove OP_BACKREF3. + +2006/08/21: Version 4.3.1 + +2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/21: [impl] change stack type values + and re-define STK_MASK_TO_VOID_TARGET etc... +2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite. +2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE. +2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} +2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} + if backreference is not used. +2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern + in combination explosion check. + +2006/08/17: Version 4.3.0 + +2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK. + check /(.+)*/, /(\s*foo\s*)*/ etc... + [API] add num_comb_exp_check member in regex_t. + [dist] change LTVERSION value to "1:0:0" in configure.in. +2006/08/15: [bug] OP_REPEAT_INC process in match_at(). + should check repeat-count >= range-upper and + range-upper may be infinite. + +2006/08/11: Version 4.2.3 + +2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/10: [impl] remove double call in set_qualifier(). +2006/08/10: [impl] remove by_number member in QualifierNode. +2006/08/09: [impl] remove a comma at the end of enum ReduceType + for escape warning on Mac OS X. +2006/08/07: [impl] remove warning in regcomp.c. +2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY. + +2006/08/03: Version 4.2.2 + +2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/03: [bug] (thanks Hiroyuki Yamamoto) + segmentation fault in regexec(). (POSIX API) +2006/08/02: [bug] combination of \G in look-ahead/look-behind and other + anchors(\A, \z, \Z) cause invalid result. + ex. /(?!\G)a\z/.match("ba") + start arg. of MATCH_ARG_INIT() should be original + arg. of onig_search(). + +2006/07/31: Version 4.2.1 + +2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/07/31: [bug] (thanks Kimura Minoru) + re-implement bm_search_notrev(). +2006/07/31: [impl] bm_search_notrev() refactoring. +2006/07/31: [bug] (thanks Kimura Minoru) + fix incomplete multibyte string in exact info. +2006/07/31: [impl] (thanks Seiji Masugata) + remove cast in va_init_list() for Intel C Compiler. + +2006/07/18: Version 4.2.0 + +2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/18: [new] (thanks Wolfgang Nadasi-Donner) + add back reference with nest level. + \k<name+n>, \k<name-n> +2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX + and ONIG_SYN_XXX number literals. + +2006/07/03: Version 4.1.2 + +2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner) + allow \G in look-behind. + add ANCHOR_BEGIN_POSITION flag in setup_tree(). +2006/06/12: [impl] (thanks matz) + fix cast from char* to const char* + in onig_snprintf_with_pattern(). + fix cast from char* to const char* + for PopularQStr[] and ReduceQStr[]. + +2006/05/22: Version 4.1.1 + +2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/22: [impl] add position string argument to STACK_BASE_CHECK(). +2006/05/22: [bug] (thanks NARUSE, Yui) + add STK_NULL_CHECK_END to IS_TO_VOID_TARGET(). + ex. core dump in + /(?<pare>\(([^\(\)]++|\g<pare>)*+\))/.match('((a))') + +2006/05/15: Version 4.1.0 + +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/15: [impl] thread atomic changes for onig_end() and + onig_free_node_list(). +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja. +2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe() + and re_recompile_pattern(). + add config USE_RECOMPILE_API. +2006/05/15: [impl] improved thread safe implementation of onig_search() + and onig_match(). + +2006/05/11: Version 4.0.4 + +2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/11: [bug] (thanks Yuji Kaneda) + dead-lock in onig_end(). +2006/05/11: [dist] update index.html. + +2006/05/08: Version 4.0.3 + +2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/08: [bug] (thanks Allan Odgaard) + Segmentation fault in backward search. + ex. /^\t.*$/ +2006/04/18: [dist] update index.html. +2006/04/05: [dist] update index.html. +2006/03/24: [dist] update doc/RE, doc/RE.ja. + +2006/03/23: Version 4.0.2 + +2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP + and ONIG_OPTION_CAPTURE_GROUP check. +2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS. +2006/03/22: [impl] remove USE_NAMED_GROUP condition from + ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect(). +2006/03/22: [new] add API onig_noname_group_capture_is_active(). +2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType. + add typedef OnigRegexType regex_t + unless ONIG_ESCAPE_REGEX_T_COLLISION is defined. +2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000 + to 10000. (for docdiff program) +2006/02/17: [dist] change COPYING year 2005 -> 2006. + +2006/02/07: Version 4.0.1 + +2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table(). +2006/02/03: [ruby] add -m 0644 option to install command in "make 19". +2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML. + change from IS_POSIXLINE() to IS_MULTILINE() + for ANCHOR_ANYCHAR_START/_ML decision + in optimize_node_left(). +2006/01/26: [dist] update index.html for Oniguruma 2.5.3. +2006/01/25: [dist] update URL in index.html. + +2006/01/24: Version 4.0.0 + +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin]. +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/01/24: [dist] remove warnings from sample/encode.c. +2006/01/24: [dist] change install description in README(.ja). +2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS. +2006/01/24: [dist] --- support shared library --- + use GNU libtool/automake. + change configure.in and add Makefile.am, sample/Makefile.am. + add AUTHORS file. +2006/01/24: [dist] test programs return exit code -1 when test fails. +2006/01/24: [bug] (thanks KIMURA Koichi) + invalid syntax definition in ONIG_SYNTAX_GREP. + ONIG_SYN_OP_BRACE_INTERVAL + -> ONIG_SYN_OP_ESC_BRACE_INTERVAL +2006/01/23: [dist] fix configure.in for onig-config. +2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS. + (U+000d, U+0085, U+2028, U+2029) +2005/12/29: [dist] change pmatch array size to 25 in testconv.rb. +2005/12/26: [dist] fix name in test.rb. +2005/12/26: [dist] update index.html for 2.5.1. + +2005/11/29: Version 3.9.1 + +2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/21: [bug] (thanks Allan Odgaard) + utf-8 character comments in extended mode leads + invalid result. + ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) / + fix onigenc_unicode_is_code_ctype() and + utf8_is_code_ctype(). +2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe) + begin-line anchor and BM search optimization leads + invalid result in UTF-16/32. + fix in set_optimize_exact_info(). + +2005/11/20: Version 3.9.0 + +2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin]. +2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR. + (!!! NO SUPPORT experimental option !!!) +2005/11/15: [bug] (thanks Allan Odgaard) + tok->escape was not cleared in fetch_token_in_cc(). + ex. [\s&&[^\n]] makes wrong result. +2005/10/18: [impl] (thanks nobu) + change sjis_mbc_enc_len() + and node_new_cclass_by_codepoint_range() scope to static. +2005/09/05: [dist] remove link to MultiFind. +2005/09/01: [dist] add link to yagrep. + +2005/08/23: Version 3.8.9 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [inst] fix Makefile.in for make ctest/ptest. + +2005/08/23: Version 3.8.8 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc(). +2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at(). +2005/08/23: [impl] (thanks akr) + add ONIG_OPTION_MAXBIT for escape conflict with + Ruby's option. +2005/08/22: [impl] escape GCC 4.0 warnings for testc.c. +2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840] + UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node(). + abort on /\S*/ =~ "\xfe" +2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c. +2005/08/22: [impl] fix testconvu.rb. +2005/08/22: [impl] escape GCC 4.0 warnings. + +2005/08/09: Version 3.8.7 + +2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/09: [bug] (thanks Allan Odgaard) + should not call enc_len() for s == range + in onig_search(). +2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install. + +2005/07/27: Version 3.8.6 + +2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux]. +2005/07/27: [impl] update onig-config.in. +2005/07/26: [new] (thanks Yen-Ju Chen) + add Oniguruma configuration check program. + (onig-config.in) + +2005/07/14: Version 3.8.5 + +2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux]. +2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux]. +2005/07/11: [bug] (thanks nobu) [ruby-dev:26505] + invalid handling for /\c\x/ and /\C-\x/. + fix fetch_escaped_value(). +2005/07/05: [impl] (thanks Alexey Zakhlestine) + escape GCC 4.0 warnings. + +2005/07/01: Version 3.8.4 + +2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux]. +2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/30: [dist] add GB 18030 test to sample/encode.c. +2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head(). +2005/06/30: [new] (contributed by KUBO Takehiro) + add new character encoding ONIG_ENCODING_GB18030. +2005/06/30: [bug] invalid ctype check for multibyte encodings. + ("graph", "print") + fix onigenc_mb2/4_is_code_ctype(), + eucjp_is_code_ctype() and sjis_is_code_ctype(). +2005/06/30: [bug] invalid conversion from code point to mbc in + onigenc_mb4_code_to_mbc(). + +2005/06/28: Version 3.8.3 + +2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner) + invalid check for never ending recursion. + lower zero quantifier should be treated as + a non-recursive call alternative. + ex. /(?<bal>[^()]*(\(\g<bal>\)[^()]*)*)/ +2005/06/15: [impl] add divide_ambig_string_node_sub(). +2005/06/15: [dist] add a test to sample/encode.c. +2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group) + +2005/06/01: Version 3.8.2 + +2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja. +2005/05/31: [impl] minor change in node_new(). +2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/30: [bug] (thanks Allan Odgaard) + FreeNodeList null check should be on thread-atomic + in node_new(). + +2005/05/11: Version 3.8.1 + +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32]. +2005/05/11: [dist] update win32/Makefile (make 19). +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux]. +2005/05/06: [impl] (thanks nobu) [ruby-core:4815] + add #ifdef USE_VARIABLE_META_CHARS to goto label. +2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux]. +2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION + to onig_rb_warn() and onig_rb_warning(). + +2005/04/15: Version 3.8.0 + +2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux]. +2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/04/01: [impl] (thanks Joe Orton) + (thanks Moriyoshi Koizumi) + many const-ification to many *.[ch] files. + +2005/03/25: Version 3.7.2 + +2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/23: [new] add ONIG_SYNTAX_ASIS. +2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE. +2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API) +2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/08: [impl] (thanks matz) [ruby-dev:25783] + should not allocate memory for key data in st.c. + move st_*_strend() functions from st.c. fixed some + potential memory leaks. + (imported from Ruby 1.9 2005-03-08) + +2005/03/07: Version 3.7.1 + +2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux]. +2005/03/07: [impl] (thanks Rui Hirokawa) + add ONIG_ESCAPE_UCHAR_COLLISION. + rename UChar to OnigUChar in oniguruma.h. +2005/03/07: [impl] remove declarations for Ruby in oniggnu.h. +2005/03/05: [bug] ANCHOR_ANYCHAR_STAR didn't work in onig_search(). +2005/03/01: [dist] remove oniggnu.h from MANIFEST-RUBY. + remove oniggnu.h from make 19. +2005/03/01: [bug] (thanks matz) [ruby-dev:25778] + uninitialized member (OptEnv.backrefed_status) + was used. + +2005/02/19: Version 3.7.0 + +2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. +2005/02/19: [new] (thanks Minero Aoki) + add onig_region_set(). +2005/02/19: [API] change onig_region_init() to extern. +2005/02/19: [dist] remove reggnu.c from MANIFEST-RUBY. + remove reggnu.c from make 19. +2005/02/19: [dist] update doc/API and doc/API.ja. +2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. +2005/02/19: [impl] (thanks Alexey Zakhlestine) + change UChar* to const UChar* in oniguruma.h, + regenc.h and regparse.h. +2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and + onigposix.h and st.h. +2005/02/12: [test] success in ruby 1.9.0 (2005-02-11) [i386-cygwin]. +2005/02/12: [bug] (thanks nobu) [ruby-dev:25676] + type_cclass_hash() fix overrun. +2005/02/09: [test] success in ruby 1.9.0 (2005-02-09) [i686-linux]. +2005/02/09: [spec] add RE_OPTION_FIND_NOT_EMPTY etc.. to oniggnu.h. +2005/02/09: [dist] remove hash.c.patch. +2005/02/07: [impl] remove re_mbctab, mbctab_ascii etc... + (USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY) + +2005/02/04: Version 3.6.0 + +2005/02/04: [test] success in ruby 1.9.0 (2005-02-04) [i686-linux]. +2005/02/01: [bug] add key_free() call to st_free_table(). +2005/02/01: [new] add onig_get_default_ambig_flag() and + onig_set_default_ambig_flag(). +2005/02/01: [dist] update MANIFEST-RUBY. +2005/01/31: [test] success in ruby 1.9.0 (2005-01-29) [i686-linux]. +2005/01/31: [spec] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND + from ONIGENC_AMBIGUOUS_MATCH_DEFAULT. +2005/01/31: [dist] update Makefile.in (make 19). +2005/01/29: [memo] (thanks Kazuo Saito) + Oniguruma 3.5.4 was merged to Ruby 1.9.0. +2005/01/28: [impl] (thanks UK-taniyama) + add extern "C" { } directive to oniguruma.h, oniggnu.h + and onigposix.h for C++. +2005/01/25: [impl] remove nested function call for xxx_code_to_mbclen(). + (euc_kr.c, euc_tw.c, big5.c) + +2005/01/19: Version 3.5.4 + +2005/01/19: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/19: [bug] (thanks Isao Sonobe) + callback function argument name_end of onig_foreach_name() + was wrong. + name key of name table should be null terminated for + character encoding length. + add strdup_with_null(), rename onig_strdup() to k_strdup(). + use e->name_len in i_names(). +2005/01/17: [impl] (thanks UK-taniyama) + add HAVE_SYS_TYPES_H to config.h.in. + +2005/01/13: Version 3.5.3 + +2005/01/13: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/13: [bug] ignore case match bug. + ex. /s+/iu.match("SSSSS") ==> [4..5] + fix OP_EXACT1_IC, OP_EXACTN_IC process. +2005/01/13: [bug] (thanks Isao Sonobe) + ignore case match bug. + ex. /is/iu.match("ss") fail. + fix str_lower_case_match() etc. + +2005/01/05: Version 3.5.2 + +2005/01/05: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/05: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux]. +2005/01/05: [bug] (thanks Isao Sonobe) + ignore case match bug. + ex. /s+/iu.match("sssss") ==> [4..5] + fix OP_EXACT1_IC, OP_EXACTN_IC process. +2005/01/05: [bug] (thanks Isao Sonobe) + group name table should be renumbered. + add onig_renumber_name_table(). +2004/12/24: [dist] remove file onigcmpt200.h. + +2004/12/17: Version 3.5.1 + +2004/12/17: [dist] add INSTALL-RUBY to archive. +2004/12/16: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux]. +2004/12/16: [dist] update hash.c.patch. +2004/12/15: [bug] (thanks matz) + char > 127 should be casted to unsigned char. (utf8.c) +2004/12/13: [impl] add HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES definition + to oniguruma.h in the case __cplusplus. +2004/12/06: [dist] update doc/RE and doc/RE.ja. +2004/12/03: [impl] (thanks nobu) + st.h fix prototype for C++. + +2004/12/03: Version 3.5.0 + +2004/12/02: [test] success in ruby 1.9.0 (2004-12-02) [i686-linux]. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-mswin32]. +2004/12/01: [dist] add make targets 19 and 19up to win32/Makefile. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-cygwin]. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i686-linux]. +2004/12/01: [impl] double cast for escape warning in Cygwin. + (HashDataType* )((void* )(&e)) in regparse.c +2004/12/01: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux]. +2004/12/01: [tune] change implementation of clear_opt_map_info(). + (which was 10-16% cost in gprof result for my test program) +2004/12/01: [dist] remove regex.c from distribution files. +2004/11/30: [memo] remove targets 16 and 18 from Makefile.in. +2004/11/30: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux]. +2004/11/30: [inst] add "cp -p st.[ch] st.[ch].ruby_orig" to "make 19". +2004/11/30: [tune] map_position_value() return 20 if code is 0 + and minimum enclen > 1. +2004/11/30: [test] success in ruby 1.9.0 (2004-11-29) [i686-linux]. +2004/11/30: [impl] minor changes for multi-thread in regexec.c and regcomp.c. +2004/11/30: [impl] change THREAD_PASS_LIMIT_COUNT value from 10 to 8. +2004/11/30: [impl] add THREAD_ATOMIC_XXX to FreeNodeList access in regparse.c +2004/11/29: [impl] add USE_MULTI_THREAD_SYSTEM. +2004/11/29: [memo] add hash.c.patch to CVS. +2004/11/29: [dist] change mail address to 'sndgk393 AT ...' +2004/11/29: [dist] add -s option (silent mode) to test.rb. +2004/11/29: [tune] change THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS value + from 20 to 8. +2004/11/29: [inst] add make target "19up". +2004/11/29: [dist] change Oniguruma Home Page URL. +2004/11/29: [impl] remove onig_is_in_code_range_array(). +2004/11/29: [dist] fix doc/RE and RE.ja (character types). +2004/11/26: [dist] fix win32/Makefile. +2004/11/26: [dist] fix doc/RE and RE.ja (multibyte character types). +2004/11/26: [impl] add onig_free_shared_cclass_table(). +2004/11/26: [impl] move definition USE_UNICODE_FULL_RANGE_CTYPE to regenc.h. +2004/11/26: [impl] add opcode OP_CCLASS_NODE. +2004/11/26: [impl] move definition of CClassNode to regint.h. +2004/11/26: [impl] add type PointerType in regint.h. +2004/11/25: [impl] remove ONIGENC_CTYPE_MOD_NOT. +2004/11/25: [impl] rename onig_node_new_cclass_by_codepoint_range to + node_new_cclass_by_codepoint_range. +2004/11/25: [impl] remove get_type_cc_node method from OnigEncodingType. +2004/11/25: [impl] move implementation of shared char-class from enc/*.c + to regparse.c. +2004/11/25: [dist] add hash.c.patch for Ruby 1.9 hash.c change. +2004/11/22: [impl] change utf8_get_type_node(). +2004/11/22: [impl] add ONIGENC_CTYPE_MOD_NOT. +2004/11/22: [bug] (thanks MIYAMUKO Katsuyuki) + ruby make test fail in HP-UX B.11.23 ia64. + should use tok->u.code instead of tok->u.c in + the case of TK_CODE_POINT. +2004/11/19: [bug] (thanks Yoshida Masato) + invalid multibyte code causes segmentation fault. + ex. /[\xFF-\xFF]/u +2004/11/19: [bug] (thanks Yoshida Masato) + illegal check in char-class range in UTF-8. + ex. s = "[\xC2\xA0-\xC3\xBE]" + p(Regexp.new(s, nil, "u") =~ "\xC3\xBE") +2004/11/18: [impl] add onig_node_new_cclass_by_codepoint_range(). +2004/11/18: [impl] remove OnigCodePointRange type. (use OnigCodePoint[].) +2004/11/17: [bug] (thanks nobu) + abort in "a".gsub(/a\Z/, "") + fix ONIGENC_STEP_BACK() argument in onig_search(). +2004/11/16: [impl] add key2 member to st_table_entry in st.[ch]. + change API of st for non-null terminated string key. +2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType. +2004/11/15: [impl] add st.h and st.c from Ruby 1.9. + use st-hash always. +2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'. + add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE. +2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c. +2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c. +2004/11/11: [bug] fix enc/utf8.c. + size 0 array initializer was compile error in VC++. +2004/11/09: [inst] (thanks Hiroki YAGITA) + change installed file mode to 0644. +2004/11/09: [bug] (thanks UK-taniyama) + wrong definitions GET_RELADDR_INC(), GET_ABSADDR_INC() + etc... (NOT PLATFORM_UNALIGNED_WORD_ACCESS) +2004/11/09: [impl] type cast in regexec() for remove compile time warning. + (WIN32, regposix.c) +2004/11/08: [spec] fix Unicode character types. + 0x00ad (soft hyphen) should be [:cntrl:] and [:space:] type. + [0x0009..0x000d], 0x0085 should be [:print:] type. + 0x00ad should not be [:punct:] type. +2004/11/08: [inst] fix Makefile.in. (for make ctest/ptest/testcu) +2004/11/06: [impl] (thanks Kazuo Saito) + too many alternatives pattern causes core dump. + change implementation of onig_node_free(). +2004/11/05: [spec] rename ONIGERR_END_PATTERN_AT_BACKSLASH to + ONIGERR_END_PATTERN_AT_ESCAPE. +2004/11/05: [impl] (thanks matz) + escape compile time warnings for x86-64 Linux. + StackIndex type int -> long +2004/11/05: [memo] (thanks Kazuo Saito) + Oniguruma 3.4.0 was merged to Ruby 1.9.0. + +2004/10/30: Version 3.4.0 + +2004/10/30: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux]. +2004/10/30: [new] add hexadecimal digit char type. (\h, \H) + syntax: ONIG_SYN_OP2_ESC_H_XDIGIT +2004/10/30: [bug] (thanks Guy Decoux) + reluctant infinite repeat bug. + ex. /^[a-z]{2,}?$/.match("aaa") fail. + fix OP_REPEAT_INC_NG process in match_at(). + +2004/10/18: Version 3.3.1 + +2004/10/18: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux]. +2004/10/18: [impl] (thanks Imai Yasumasa) + enclose #include <sys/types.h> by #ifndef __BORLANDC__. +2004/10/18: [bug] (thanks Imai Yasumasa) + memory acess violation in select_opt_exact_info(). +2004/09/25: [dist] fix doc/API and doc/API.ja. +2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for + the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + is not defined. + +2004/09/17: Version 3.3.0 + +2004/09/17: [dist] add COPYING to program source files. +2004/09/17: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/09/17: [bug] (thanks Isao Sonobe) + memory access violations in xxx_mbc_enc_len(), + and xxx_mbc_to_normalize() and + xxx_left_adjust_char_head(). + add string range check in match_at() and onig_search(). +2004/09/08: [dist] change mail address format.(kosako AT sofnec ...) + +2004/09/04: Version 3.2.9 + +2004/09/04: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/09/04: [bug] (thanks Bob Kerstetter and Richard Koch) + search fail in ignore case mode. + fix str_lower_case_match(). +2004/09/04: [inst] (thanks Isao Sonobe) + clear sample directory in 'make clean'. +2004/09/04: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII + meanings in XXXXX_mbc_to_normalize() and + XXXXX_is_mbc_ambiguous(). +2004/08/28: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII + meanings in iso_8859_XX_mbc_to_normalize() and + iso_8859_XX_is_mbc_ambiguous(). + +2004/08/24: Version 3.2.8 + +2004/08/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/08/24: [spec] add ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY. + /a{n}?/ == /(?:a{n})?/ +2004/08/24: [dist] fix doc/RE and doc/RE.ja. +2004/08/24: [bug] (thanks starfish) + memory leak in set_optimize_exact_info(). + +2004/08/21: Version 3.2.7 + +2004/08/21: [test] success in ruby 1.8.2 (2004-07-28) [i686-linux]. + (1.8.2 preview2) +2004/08/21: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/08/21: [bug] (thanks Isao Sonobe) (thanks kage) + memory access violation in bm_search_notrev(). + (forgotten to merge from 2.X) + +2004/07/24: Version 3.2.6 + +2004/07/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/07/24: [test] success in ruby 1.8.2 (2004-07-16) [i686-linux]. +2004/07/24: [bug] fix warnings for regexec.c. (gcc 2.91.66) +2004/07/24: [memo] change version control system from Subversion + to CVS 1.11.17. +2004/07/20: [bug] (thanks Isao Sonobe) + illegal result in negative character class in ignore case + mode. fix pair-ambig-codes process in parse_exp(). + ex. /[^a]/i.match("A") +2004/07/20: [bug] (thanks Isao Sonobe) + undefined bytecode error happens in UTF-16BE etc.. + compile_length_cclass_node() was not consistent with + compile_cclass_node(). + +2004/07/01: Version 3.2.5 + +2004/07/01: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/07/01: [new] add onig_get_syntax_{op,op2,behavior,options}. +2004/07/01: [bug] (thanks Isao Sonobe) + invalid result in onig_capture_tree_traverse(). + fix make_capture_history_tree(). + +2004/06/29: Version 3.2.4 + +2004/06/29: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/29: [new] (thanks Isao Sonobe) + add onig_number_of_captures(). + +2004/06/25: Version 3.2.3 + +2004/06/25: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/25: [bug] (thanks Isao Sonobe) + invalid result in onig_capture_tree_traverse(). + fix make_capture_history_tree(). + +2004/06/24: Version 3.2.2 + +2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/06/24: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/24: [new] (thanks Isao Sonobe) + add onig_number_of_capture_histories(). +2004/06/24: [bug] (thanks Isao Sonobe) + invalid char position match in UTF-16 and UTF-32. + add onigenc_always_false_is_allowed_reverse_match(). + +2004/06/17: Version 3.2.1 + +2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/06/17: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/06/17: [impl] should not use OP_REPEAT for (...)? even if target size + is long. +2004/06/17: [bug] (thanks nobu) [ruby-dev:23703] + should use STACK_AT() instead of stkp in OP_REPEAT_INC. + add IN_VAR_REPEAT flag in setup_tree(). +2004/06/16: [impl] change select_opt_exact_info() to use ByteValTable[]. +2004/06/16: [impl] change map_position_value() table values. +2004/06/14: [impl] (thanks John Carter) + RelAddrType, AbsAddrType and LengthType change + from short int to int type for the very long string match. +2004/06/14: [bug] (thanks Greg A. Woods) + fix nmatch argument of regexec() is smaller than + reg->num_mem + 1 case. (POSIX API) +2004/06/14: [spec] (thanks Greg A. Woods) + set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API) + +2004/06/10: Version 3.2.0 + +2004/06/10: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/10: [test] success in ruby 1.9.0 (2004-05-27) [i386-mswin32]. +2004/06/10: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/06/10: [dist] add README.ja. +2004/06/10: [new] add onig_copy_encoding(). +2004/06/10: [API] add encoding argument to onig_set_meta_char(). + add meta_char_table member to OnigEncodingType. +2004/06/08: [dist] add doc/API.ja. +2004/06/07: [API] add num_of_elements member to OnigCompileInfo. +2004/05/29: [memo] (thanks Kazuo Saito) + Oniguruma 3.1.0 was merged to Ruby 1.9.0. +2004/05/26: [impl] rename NST_SIMPLE_REPEAT to NST_STOP_BT_SIMPLE_REPEAT. +2004/05/26: [impl] doesn't need to check that target's simple repeat-ness + for EFFECT_MEMORY type node in setup_tree(). + +2004/05/25: Version 3.1.0 + +2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/05/25: [test] success in ruby 1.9.0 (2004-05-23) [i686-linux]. +2004/05/25: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/05/25: [bug] (thanks Masahiro Sakai) [ruby-dev:23560] + ruby -ruri -ve 'URI::ABS_URI =~ + "http://example.org/Andr\xC3\xA9"' + nested STK_REPEAT type stack can't backtrack repeat_stk[]. + add OP_REPEAT_INC_SG and OP_REPEAT_INC_NG_SG. +2004/05/25: [new] support UTF-32LE. (ONIG_ENCODING_UTF32_LE) +2004/05/25: [new] support UTF-32BE. (ONIG_ENCODING_UTF32_BE) +2004/05/24: [impl] divide enc/utf16.c to utf16_be.c and utf16_le.c. +2004/05/24: [impl] add enc/unicode.c. +2004/05/24: [API] change calling sequences of onig_new_deluxe() and + onig_recompile_deluxe(). + define OnigCompileInfo type. +2004/05/21: [impl] perform ensure process for rb_trap_exec() in match_at(). + add onig_exec_trap() and CHECK_INTERRUPT_IN_MATCH_AT. +2004/05/21: [impl] add regex status check to onig_match(). +2004/05/21: [new] add onig_get_capture_tree() and + onig_capture_tree_traverse(). +2004/05/20: [spec] (thanks Isao Sonobe) + capture history return capture data tree. + (see sample/listcap.c) +2004/05/19: [bug] (thanks Simon Strandgaard) + Control-C does not work in matching process on Ruby. + add calling of CHECK_INTERRUPT into match_at(). + ex. /<(?:[^">]+|"[^"]*")+>/.match('<META http-equiv= \ + "Content-Type content="text/html; charset=iso-8859-1">') +2004/05/19: [bug] (thanks Simon Strandgaard) + define virtual codepoint values for invalid encoding + byte 0xfe and 0xff in UTF-8. + ex. /\w+/u.match("%a\xffb\xfec%") ==> "a" +2004/05/19: [spec] (thanks Simon Strandgaard) + too big backref number should be treated as a sequence of + an octal char and number digits. + ex. /b\3777\c/.match("b\3777\c") +2004/05/17: [spec] rename encoding names "UTF-16 BE" and "UTF-16 LE" + to "UTF-16BE" and "UTF-16LE". +2004/05/17: [impl] move ismbchar() and mbclen() from oniguruma.h to oniggnu.h. +2004/05/17: [impl] rename onigenc_single_byte_is_allowed_reverse_match() to + onigenc_always_true_is_allowed_reverse_match(). + +2004/05/14: Version 3.0.0 + +2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/05/14: [test] success in ruby 1.9.0 (2004-05-14) [i686-linux]. +2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. + (* need to edit parse.y: + register int c; ---> int c; in yylex()) +2004/05/14: [impl] add regext.c. +2004/05/14: [spec] KOI8 is not included in library archive by default setup. +2004/05/14: [impl] implementation changes are completed for all encoding files. +2004/05/12: [impl] add divide_ambig_string_node(). + ambiguous string is divided and normalized before + optimization and compilation process. +2004/05/11: [dist] remove INSTALL-RUBY from distribution. +2004/04/28: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.8 was merged to Ruby 1.9.0. +2004/04/26: [spec] change value DEFAULT_MATCH_STACK_LIMIT_SIZE = 0 : unlimited +2004/04/26: [new] add onig_get_match_stack_limit_size() and + onig_set_match_stack_limit_size(). +2004/04/26: [bug] add error check to re.c.181.patch and re.c.168.patch. +2004/04/23: [impl] remove ctype_support_level from OnigEncodingType. +2004/04/22: [spec] allow the range from single byte char to multibyte char in + character class for implementation reason. + ex. /[a-\xbb\xcc]/ in EUC-JP encoding. +2004/04/21: [impl] remove max_enc_len_by_first_byte() from OnigEncodingType. +2004/04/20: [new] add onig_copyright(). +2004/04/20: [impl] add regversion.c. +2004/04/15: [new] add onig_get_ambig_flag(). +2004/04/14: [bug] (thanks Isao Sonobe) + undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST + is setted. + should finish matching process if find-condition + is fail at OP_END in match_at(). +2004/04/12: [impl] add ambig_flag to regex_t. +2004/04/09: [impl] move onig_set_meta_char() to regsyntax.c. +2004/04/09: [bug] (thanks HIROSE Masaaki) fix onig_version(). +2004/04/08: [impl] add regsyntax.c. +2004/04/07: [new] support UTF-16 LE. (ONIG_ENCODING_UTF16_LE) +2004/04/05: [impl] add ONIGENC_CTYPE_NEWLINE. +2004/04/05: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.6 was merged to Ruby 1.9.0. +2004/04/02: [memo] Version 2.2.6 was released. +2004/03/26: [new] support UTF-16 BE. (ONIG_ENCODING_UTF16_BE) +2004/03/25: [spec] support non 8-bit encodings. +2004/03/16: [memo] 2.X branch for 8-bit encodings only. + +2004/03/16: Version 2.2.5 + +2004/03/16: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/03/16: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux]. +2004/03/16: [impl] add property name to error message of + ONIGERR_INVALID_CHAR_PROPERTY_NAME. +2004/03/16: [spec] allow prefix 'Is' for \p{...} in ONIG_SYNTAX_PERL. + add syntax op. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS. +2004/03/15: [dist] add sample/syntax.c. +2004/03/15: [spec] support NOT op. in char property. \p{^...}, \P{^...}. + add syntax op. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT. +2004/03/15: [spec] rename ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY to + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY. +2004/03/10: [impl] move ONIGERR_XXX from regenc.h to oniguruma.h, + rename ONIGERR_XXX to ONIGENCERR_XXX in regenc.h. +2004/03/08: [impl] (thanks eban) + replace defined(__CYGWIN__) to defined(__GNUC__). +2004/03/08: [bug] (thanks eban) [ruby-dev:23172] + need to separate initialization for bcc32. +2004/03/06: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.4 was merged to Ruby 1.9.0. +2004/03/05: [API] change second argument type of onig_set_meta_char() + from unsigned int to OnigCodePoint. +2004/03/05: [dist] (thanks Kazuo Saito) + add MANIFEST-RUBY. + +2004/03/04: Version 2.2.4 + +2004/03/04: [impl] (thanks Moriyoshi Koizumi) + fix many warnings in Win32 VC++ with /W3 option. + +2004/03/02: Version 2.2.3 + +2004/03/02: [bug] (thanks Isao Sonobe) + return invalid capture region value if capture history + is used. (OP_MEMORY_END_PUSH_REC bug) + ex. /\g<p>(?@<p>\(\g<s>\)){0}(?<s>(?:\g<p>)*|){0}/ + .match("((())())") +2004/03/02: [impl] (thanks Kazuo Saito) + add :nodoc: to onig_stat_print() for RDoc. +2004/03/02: [impl] don't use ONIG_SOURCE_IS_WRAPPED. + +2004/02/27: Version 2.2.2 + +2004/02/27: [impl] fix the position of onig_stat_print(). +2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h + for ignored by RDoc. + +2004/02/26: Version 2.2.1 + +2004/02/26: [bug] [bugs.php.net:#26677] (thanks behrens) + invalid definition at onig_error_code_to_str() + in the case of NOT HAVE_STDARG_PROTOTYPES. + +2004/02/25: Version 2.2.0 + +2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux]. +2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in + onigenc_is_code_ctype() in the case of Ruby M17N. +2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16) +2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c. +2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14) +2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13) +2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10) +2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig(). +2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9) +2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4, + ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R. +2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex. +2004/02/17: [spec] check capture status for empty infinite loop. + [ruby-dev:20224] etc... + ex. /(?:\1a|())*/.match("a"), + /(?:()|()|()|(x)|()|())*\2b\5/.match("b") + add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK. + add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH. + add stack type STK_NULL_CHECK_END. +2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c. +2004/02/13: [bug] (thanks Simon Strandgaard) + parsing of nested repeat was invalid. + ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/, + should be /a(?:b{2,3}*)/ +2004/02/12: [bug] (thanks Simon Strandgaard) + OP_REPEAT_INC_NG process in match_at() is wrong. + ex. bad match /a.{0,2}?a/ =~ "0aXXXa0" +2004/02/12: [bug] (thanks Simon Strandgaard) + wrong fetch after (?x) option. ex. "(?x)\ta .\n+b" +2004/02/12: [bug] (thanks Simon Strandgaard) + [\^] is not a empty char class. +2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(), + onig_set_syntax_behavior(), onig_set_syntax_options(). +2004/02/06: [dist] add a new target 'site' to Makefile.in. +2004/02/06: [dist] add index.html. +2004/02/03: [bug] oniggnu.h was not installed by 'make install'. + +2004/02/02: Version 2.1.0 + +2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11) +2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5) +2004/02/02: [impl] should check single byte encoding or not in and_cclass() + and or_cclass(). +2004/01/30: [dist] add oniggnu.h. +2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type. +2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8) +2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7) +2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6) +2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R) +2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8) +2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c. +2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4) +2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3) +2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong. + (0x80 - 0xff is not ASCII) +2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2) +2004/01/23: [dist] add enc/isotable.c. +2004/01/22: [new] support EUC-TW. (ONIG_ENCODING_EUC_TW) +2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and + ALIGNMENT_RIGHT() was wrong. + type casting should be unsigned int, not int. +2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__) + to unaligned word access condition. (AMD64 ?) +2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c. +2004/01/21: [new] support EUC-KR. (ONIG_ENCODING_EUC_KR) +2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/01/20: [dist] change Makefile.in. +2004/01/20: [spec] add \p{...}, \P{...} in char class. +2004/01/20: [new] character property operators \p{...}, \P{...}. + supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL. +2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/) +2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h. +2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch. +2004/01/16: [dist] update sample/*.c for new API. +2004/01/16: [dist] add onigcomp200.h. (for old API compatibility) +2004/01/16: [dist] update documents API, RE and RE.ja. +2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_, + ENC_ -> ONIGENC, enc_ -> onigenc_. +2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD(). + rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to + ENC_CTYPE_SUPPORT_LEVEL_SB. +2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to + PLATFORM_UNALIGNED_WORD_ACCESS. +2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000. +2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h. + remove code_to_mbc_first member in RegCharEncodingType. +2004/01/13: [impl] remove head byte bitset information in cclass->mbuf. +2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for + escape conflict. + +2003/12/24: Version 2.0.0 + +2003/12/24: [spec] ignore case option is effective to numbered char. + ex. /\x61/i =~ "A" +2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2003/12/24: [test] success in regex.c compile test on ruby-m17n. + (but can't make miniruby because re.c patch fail.) +2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5. +2003/12/22: [spec] implement fold match on UTF-8 encoding. +2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to + RegCharEncoding type. +2003/12/19: [impl] add add_ctype_to_cc() in regparse.c. +2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case. +2003/12/19: [impl] change ENC_CODE_TO_MBC() interface. +2003/12/18: [new] implement fold match. (variable number of char + match in ignore case mode.) + ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss". +2003/12/17: [impl] refactoring of encoding system. +2003/12/17: [impl] add enc_init() in regenc.c. +2003/12/17: [new] support Big5. (REG_ENCODING_BIG5) +2003/12/16: [impl] change CodePoint from unsigned int to unsigned long. +2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15) +2003/12/16: [impl] change P_() macro definition condition for Win32. +2003/12/16: [dist] add sample/encode.c +2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1) +2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX. +2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding. +2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...) +2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern(). + change local var. type char* to UChar*. +2003/12/15: [impl] remove REG_MBLEN_TABLE[]. +2003/12/15: [spec] rename function prefix regex_get_prev_char_head(), + regex_get_left_adjust_char_head() and + regex_get_right_adjust_char_head() to enc_xxxxxx(). +2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_. +2003/12/12: [impl] remove USE_SBMB_CLASS. +2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len(). +2003/12/12: [impl] rename WCINT to CodePoint. +2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h. +2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256]. +2003/12/11: [impl] remove RegAsciiCaseAmbigTable. +2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison. +2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. *** +2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h. +2003/12/08: [impl] add IS_ENC_WORD() to regenc.h. +2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX(). +2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h. +2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF. +2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND | + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF) + to RegSyntaxGrep. +2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT. +2003/12/04: [spec] move declarations of regex_get_default_encoding() and + regex_set_default_encoding() from oniguruma.h to regenc.h. +2003/12/03: [new] add regex_get_default_encoding() and + regex_set_default_encoding(). +2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed. + (current default value, not initial default value.) +2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX. +2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"' +2003/12/02: [spec] move regex_set_default_trans_table() declaration + from oniguruma.h to regenc.h. (obsoleted API) +2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and + AmbiguityTable to regenc.c. +2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c. +2003/12/01: [dist] add regenc.h and regenc.c. +2003/11/18: [dist] change testconv.rb. +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in parse_subexp(). +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in names_clear() and parse_char_class(). +2003/11/17: [bug] memory leak in parse_char_class(). +2003/11/17: [bug] (thanks Masaru Tsuda) + OptExactInfo length should not over OPT_EXACT_MAXLEN. + (concat_opt_exact_info_str()) + +2003/11/12: Version 1.9.5 + +2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux]. +2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR. +2003/11/11: [dist] add a sample program sample/sql.c. +2003/11/11: [new] add variable meta character. + regex_set_meta_char() +2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS. +2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to + REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_QMARK_GROUP_EFFECT to + REG_SYN_OP2_QMARK_GROUP_EFFECT. +2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode. +2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT. +2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to + REG_SYN_WARN_CC_OP_NOT_ESCAPED. +2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func(). +2003/10/30: [new] add regex_name_to_backref_number(). + (for multiplex definition name, see sample/names.c) +2003/10/30: [spec] add name_end and reg argument to callback function of + regex_foreach_name(). (see sample/names.c) +2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME. + add error code REGERR_MULTIPLEX_DEFINED_NAME. +2003/10/14: [dist] modify sample/simple.c. +2003/10/03: [bug] (thanks nobu) [ruby-dev:21472] + sub-anchor of optimization map info was wrong + in concat_left_node_opt_info(). + ex. /^(x?y)/ = "xy" fail. + +2003/09/17: Version 1.9.4 + +2003/09/17: [spec] change specification of char-class range in ignore case mode + follows with Ruby 1.8(2003-09-17). + ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i + ==> H-Z, h-z, 0x5b-0x60, a-c, A-C +2003/09/16: [bug] (thanks Guy Decoux) + remove env->option == option check in parse_effect(). + change env->option for dynamic option in parse_exp(). + (ex. bad match /(?i)(?-i)a/ =~ "A") +2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to + REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC, + REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC. +2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR. +2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to + REG_SYN_OP2_ESC_CAPITAL_M_BAR_META, + REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP, + REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR, + REG_SYN_OP_ESC_GNU_BUF_ANCHOR to + REG_SYN_OP2_ESC_GNU_BUF_ANCHOR, + REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS, + REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD, + REG_SYN_OP_ESC_WORD_BEGIN_END to + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END, + REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND, + REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE, + REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT, + REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC, + REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP, + REG_SYN_CONTEXT_INDEP_OPS to + REG_SYN_CONTEXT_INDEP_REPEAT_OPS, + REG_SYN_CONTEXT_INVALID_REPEAT_OPS to + REG_SYN_CONTEXT_INVALID_REPEAT_OPS. + add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR. +2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR, + REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF, + REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF, + REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF, + REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE, + REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL, + REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL, + REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP, + REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT, + REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT, + REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY, + REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT, + REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to + REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL}, + REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL, + REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP + and REG_SYN_OP2_ESC_K_NAMED_BACKREF. +2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for + no-name group in noname_disable_map(). + ex. /(a+)*(?<name>...)/ +2003/09/02: [impl] include <stdio.h> is forgotten to erase in regcomp.c. +2003/09/01: [dist] update doc/RE and doc/RE.ja. +2003/08/26: [bug] (thanks Guy Decoux) + should not double free node at the case TK_CC_CC_OPEN + in parse_char_class(). + +2003/08/19: Version 1.9.3 + +2003/08/19: [inst] change re.c.180.patch. +2003/08/19: [impl] rename 'list of captures' to 'capture history'. +2003/08/19: [dist] add doc/RE.ja. (Japanese) +2003/08/19: [new] add regex_copy_syntax(). +2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to + REG_SYN_OP2_ATMARK_CAPTURE_HISTORY. +2003/08/18: [spec] (thanks nobu) + don't use IMPORT in oniguruma.h and onigposix.h. +2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb. +2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in. +2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1. +2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0. +2003/08/18: [bug] (thanks nobu) + single/multi-byte decision was wrong in parse_char_class(). + add regex_wc2mblen(). + should not set fetched to 1 in TK_RAW_BYTE case. +2003/08/18: [bug] should update BitSet in the case inc_n >= 0 + in add_wc_range_to_buf(). +2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c. +2003/08/11: [bug] should clear region->list in regex_region_resize(). + +2003/08/08: Version 1.9.2 + +2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000 + VC++ 6.0 and Cygwin. +2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform, + because definition is added in win32\win32.h. +2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux. +2003/08/08: [dist] change re.c.180.patch and re.c.168.patch. +2003/08/08: [new] (thanks akr) + implemented list of captures. (?@...), (?@<name>...) +2003/08/07: [dist] add sample/listcap.c. +2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at(). + renewal of mem_start_stk[] should be after + STACK_PUSH_MEM_END() call. +2003/07/29: [new] add regex_get_encoding(), regex_get_options() and + regex_get_syntax(). +2003/07/25: [spec] (thanks akr) + change group(...) to shy-group(?:...) if named group is + used in the pattern. + add REG_SYN_CAPTURE_ONLY_NAMED_GROUP. +2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to + REG_OPTION_DONT_CAPTURE_GROUP. + add REG_OPTION_CAPTURE_GROUP. +2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP. +2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME. +2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME + to REGERR_INVALID_CHAR_IN_GROUP_NAME. +2003/07/17: [spec] restrict usable chars of group name to alphabet, digit, + '_' or multibyte-char in fetch_name(). [ruby-dev:20706] +2003/07/16: [impl] minor change of sample/names.c. +2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP. +2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case. +2003/07/14: [API] add regex_number_of_names(). +2003/07/08: [impl] change error message for undefined group number call. + 'undefined group reference: /(a)\g<2>/' + --> 'undefined group <2> reference: /(a)\g<2>/' +2003/07/08: [dist] modify doc/RE. +2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code. + add IS_DYNAMIC_OPTION() to regint.h. +2003/07/07: [spec] called group should not ignore outside option (?i:...). + ex. /(?i:(?<n>(a)\2)){0}\g<n>/.match("aA") + add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC. + set option status to effect memory in optimize_node_left(). +2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and + OP_ANYCHAR_ML_START_PEEK_NEXT. +2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1. +2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE. + +2003/07/04: Version 1.9.1 + +2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET) +2003/07/04: [spec] check mbuf member in the case of + REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class(). +2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. + should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED. +2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and + REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu) +2003/07/03: [spec] add REG_SYN_OP_ESC_CONTROL_CHAR flag. +2003/07/03: [spec] remove REG_SYN_OP_ESC_OCTAL3 and REG_SYN_OP_ESC_X_HEX2 + flag from RegSyntaxGnuRegex. +2003/07/03: [spec] remove REG_SYN_OP_NON_GREEDY flag from RegSyntaxGnuRegex. +2003/07/02: [dist] fix doc/RE. +2003/07/01: [impl] add config flag USE_VARIABLE_SYNTAX. + (turn off variable syntax on Ruby) +2003/07/01: [spec] add syntax behavior REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND. +2003/06/30: [spec] allow different length top-level alternatives + in look-behind. ex. (?<=abc|abcd), (?<!a|bc) +2003/06/26: [spec] add option REG_OPTION_NEGATE_SINGLELINE. +2003/06/26: [spec] should default on REG_OPTION_SINGLELINE + for REG_SYNTAX_PERL and REG_SYNTAX_JAVA. +2003/06/26: [impl] add options member to RegStntaxType. +2003/06/26: [spec] don't change the meaning of '\Z' for REG_OPTION_SINGLELINE. +2003/06/25: [dist] don't use option REG_NEWLINE for sample/posix.c. +2003/06/25: [dist] modify testconv.rb. + should match and convert double quoted string data. + ex. x(/\ca/, "\001", 0, 1) +2003/06/25: [impl] add REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL and + REG_SYN_OP2_ESC_M_BAR_META. +2003/06/25: [impl] add REG_SYN_OP_ESC_OCTAL3 and REG_SYN_OP_ESC_X_HEX2. +2003/06/24: [impl] add REG_SYN_OP2_ESC_V_VTAB. (\v is VTAB) +2003/06/24: [bug] should invert REG_OPTION_SINGLELINE flag + in REG_SYN_OP2_OPTION_PERL. +2003/06/24: [impl] add REG_SYN_OP2_OPTION_PERL and REG_SYN_OP2_OPTION_RUBY. + meaning of (?m) and (?s) are depend on syntax. + +2003/06/20: Version 1.9.0 + +2003/06/20: [spec] \Q...\E is not effective on REG_SYNTAX_RUBY. (thanks akr) +2003/06/19: [inst] rename regex.h to oniguruma.h. +2003/06/18: [impl] change REG_EXTERN setting condition. (__CYGWIN__) +2003/06/18: [bug] return wrong result UTF-8 case in regex_mb2wc(). +2003/06/18: [impl] add REG_SYN_OP2_POSSESSIVE_INTERVAL. a{n,m}+ +2003/06/18: [new] add REG_SYNTAX_JAVA. +2003/06/18: [spec] add REG_SYN_OP_QUOTE. +2003/06/18: [spec] add op2 member to RegSyntaxType. + rename some REG_SYN_OP_XXX to REG_SYN_OP2. +2003/06/16: [new] Perl-like quotation operator \Q, \E. +2003/06/16: [spec] should not control ignore case mode by escaped char. + ex. /\J/i =~ "j", /[\J]/i =~ "j" (same as Perl) +2003/06/13: [bug] modify onigposix.h. +2003/06/13: [bug] should use -DIMPORT for link with DLL in win32/Makefile. +2003/06/13: [dist] add sample/names.c +2003/06/12: [bug] range should be from - 1 in not_wc_range_buf(). +2003/06/12: [spec] should warn for '-' before '&&' operator in char-class. +2003/06/12: [new] add REG_SYNTAX_PERL. +2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. +2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]] +2003/06/11: [new] char-class in char-class (as Java(TM)). +2003/06/11: [spec] change AND operator in char-class from &&[..] to &&. +2003/06/04: [spec] {n,m}+ should not be possessive operator. + ex. a{3}+ should be (?:a{3})+ +2003/06/03: [bug] should compare strings with min-length in is_not_included(). +2003/06/03: [impl] automatic possessivate optimization. a*b ==> (?>a*)b + (thanks Jeffrey E. F. Friedl) +2003/06/02: [impl] remove multibyte-BitSet for OP_CCLASS_MB/OP_CCLASS_MB_NOT. +2003/05/30: [new] char class intersection operator &&[...] like Java(TM). + (thanks akr) +2003/05/30: [bug] should use bbuf_free() for CClassNode in regex_node_free(). +2003/05/29: [bug] wrong usage of syntax REG_SYN_ALLOW_EMPTY_RANGE_IN_CC. + /[d-a]/ should be error. +2003/05/28: [impl] optimize stop-backtrack compiled code. + (/(?>a*)/, /(?>\w+)/ etc...) + add OP_POP opcode. +2003/05/28: [new] possessive repeat operator. (?+, *+, ++, {n,m}+) +2003/05/27: [spec] '-' at beginning of char-class should be warn only if + it is start of range. (ex. /[--a]/) +2003/05/27: [spec] should not warn for right bracket at beginning of pattern. + ex. /]aaa/ +2003/05/27: [spec] change CCEND_ESC_WARN() from VERB_WARNING() to WARNING(). +2003/05/27: [spec] /[]aaa/ should be empty char-class error. + /[]aaa]/ should be warn for 'without backslash'. + (add char_exist_check() in regparse.c) +2003/05/26: [bug] OP_REPEAT in recursive subexp call. + ex. /(?<n>(a|b\g<n>c){3,5})/.match("baaaaca") => "baaaaca" + was wrong result. (should be "aaaa") +2003/05/26: [impl] add num_call member to regex_t. +2003/05/26: [impl] add repeat_range member to regex_t. + (for delete upper,lower members from StackType.u.repeat) +2003/05/26: [bug] change print_names() to external regex_print_names(). +2003/05/26: [tune] change OP_NULL_CHECK_END process in match_at(). +2003/05/26: [spec] change CCEND_ESC_WARN() from WARNING() to VERB_WARNING(). +2003/05/26: [spec] remove POSIXLINE option. (?p:...) + (be made the same as Ruby.) +2003/05/22: [spec] use OP_NULL_CHECK_XXX only if repeat is infinite. + prev. /(?:()|()){0,10}\1\2/ =~ "" ==> FAIL + now /(?:()|()){0,10}\1\2/ =~ "" ==> MATCH + +2003/05/22: [impl] change target_empty setting condition in setup_tree(). +2003/05/19: [impl] avoid zero length repeat optimization. (thanks matz) + /()*/ ==> /()?/, /()+/ ==> /()/ etc... +2003/05/19: [impl] minor changes for gcc -Wall. (-DREG_DEBUG_STATISTICS case) +2003/05/19: [spec] rename regex_foreach_names() to regex_foreach_name(). +2003/05/16: [new] add --with-statistics option to configure. +2003/05/16: [bug] move RegOpInfo[] definition to regint.h. +2003/05/16: [new] add regex_version(). + +2003/05/14: Version 1.8.6 + +2003/05/14: [bug] use _vsnprintf() on Win32. +2003/05/14: [spec] define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE. + (/\n$/ =~ "\n", /\n\Z/ =~ "\n") [ruby-dev:20125] +2003/05/14: [impl] minor changes for gcc -Wall. +2003/05/14: [impl] add string.h check in AC_CHECK_HEADERS(). +2003/05/13: [impl] minor changes for gcc -Wall. +2003/05/13: [impl] add regex_snprintf_with_pattern(). +2003/05/13: [spec] add warning for char class meta character without escape + in Ruby mode ('[', '-', ']'). +2003/05/13: [impl] define WARNING() and VERB_WARNING() in regint.h. +2003/05/13: [bug] correct is_code_ascii() for /[[:ascii:]]/. +2003/05/12: [dist] add regular expression document (doc/RE). +2003/05/12: [spec] specification of $(END_LINE) was made the same as Ruby 1.8. + [ruby-dev:20130] (thanks matz) +2003/05/12: [memo] shifted to Subversion(version 0.21.0) from CVS. + +2003/03/19: Version 1.8.5 + +2003/03/19: [impl] change REG_EXTERN definition. (thanks nobu) +2003/03/19: [impl] abbreviation for long error_par in regex_error_code_to_str(). +2003/03/18: [dist] change re.c.XXX.patch for GNU regex API changes. +2003/03/18: [spec] change API regex_new(), regex_recompile() and + regex_error_code_to_str(). + change API re_compile_pattern() and re_recompile_pattern(). +2003/03/18: [spec] replace REGERR_END_PATTERN_AT_GROUP_{COMMENT|OPTION} to + REGERR_END_PATTERN_IN_GROUP. +2003/03/17: [impl] should free err_arg. +2003/03/17: [bug] mistake(high -> to) in add_wc_range_to_buf(). +2003/03/17: [spec] add err_arg argument to regex_new() and regex_recompile(). + for detail error message. (thanks akr) + +2003/03/12: Version 1.8.4 + +2003/03/12: [tune] use cached value of effect node in get_min_match_length(). +2003/03/12: [bug] escaped alphabet should be TK_RAW_BYTE + in fetch_token() and fetch_token_in_cc(). +2003/03/12: [spec] change named backref and subexp call format. + backref: \k<name>, call: \g<name> (thanks akr) +2003/03/11: [inst] add regparse.[ch] in win32/Makefile. +2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't setted + then compile error in unset_addr_list_fix(). (thanks knu) +2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h. +2003/03/10: [bug] should handle multi-byte code name in fetch_name(). +2003/03/10: [spec] remove REGERR_TABLE_FOR_IGNORE_CASE_IS_NOT_SETTED. +2003/03/10: [spec] support POSIX API option REG_NOSUB. + add comp_options member to POSIX API regex_t. + +2003/03/10: Version 1.8.3 + +2003/03/10: [bug] can not compile with Ruby 1.6.8. + (inconsistent st.h with 1.6 and 1.8) + use hash table on Ruby 1.8 only. +2003/03/10: [spec] forbid to use '\' in group name. +2003/03/08: [impl] remove check_backref_number(). +2003/03/08: [bug] called group in 0-repeat should not be eliminated from + compile code. ex. /(?*n)(?<n>){0}/ (thanks akr) + add is_refered member to QualifierNode. +2003/03/07: [impl] use hash table(st.[ch]) for implementation of name table. + (enable on Ruby in default) +2003/03/07: [new] add regex_foreach_names(). +2003/03/06: [impl] add member reg->stack_pop_level. +2003/03/06: [impl] add operator OP_MEMORY_START and member reg->backtrack_mem. +2003/03/06: [bug] if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY, + should handle backtrack of MEM_END. + add OP_MEMORY_END_PUSH and OP_MEMORY_END_PUSH_REC. +2003/03/06: [impl] rename OP_MEMORY_END_PUSH to OP_MEMORY_END_MARK. +2003/03/06: [spec] change error messages. +2003/03/06: [tune] add tiny_pop check in STACK_POP. + +2003/03/05: Version 1.8.2 + +2003/03/05: [impl] use cache info in EFFECT_MEMORY case + in optimize_node_info(). +2003/03/05: [impl] add EFFECT_MEMORY node reference count check + in optimize_node_left(). +2003/03/05: [impl] add min-len, max-len, char-len cache in EffectNode. +2003/03/05: [spec] allow to call in look behind. ex. /(?<=(?*a))/ +2003/03/05: [bug] forgotten N_ANCHOR case in check_backref_number(), + subexp_inf_recursive_check_trav() etc... +2003/03/05: [impl] rename USE_ONIGURUMA_EXTENSION to USE_SBMB_CLASS. +2003/03/04: [impl] add CALL-node info in optimize_node_left(). +2003/03/04: [spec] prohibit left recursion of subexp call. ex. (?<n>|(?*n)a) + add subexp_inf_recursive_check_trav(). +2003/03/04: [spec] rename REG_SYN_STRICT_CHECK_BACKREF_NUMBER + to REG_SYN_STRICT_CHECK_BACKREF +2003/03/03: [bug] /(?<n>a(?*n)|)/ isn't infinite recursion. + fix N_LIST case in subexp_recursive_check(). (thanks akr) +2003/03/03: [bug] /(?<n>|(?*n))+/ segmentation fault. + should re-allocate in unset_addr_list_add(). (thanks akr) + +2003/03/01: Version 1.8.1 + +2003/03/01: [bug] change STACK_GET_MEM_START() and STACK_PUSH_MEM_END(). +2003/03/01: [new] add reg_name_to_group_numbers() to POSIX API. +2003/03/01: [impl] use OP_MEMORY_END_PUSH in callable subexp compiled code + only if subexp is recursive. +2003/03/01: [spec] rename regex_name_to_backrefs() to + regex_name_to_group_numbers(). +2003/02/28: [impl] use function stack_double() instead of macro. +2003/02/28: [new] subexp call. (?*name) (thanks akr) +2003/02/28: [spec] add match stack limit check. (MATCH_STACK_LIMIT_SIZE) +2003/02/28: [impl] check recursive subexp call. +2003/02/28: [impl] add opcode OP_MEMORY_END_PUSH for callable subexp. +2003/02/28: [impl] add opcode OP_CALL, OP_RETURN. + add stack type STK_CALL_FRAME, STK_RETURN, STK_MEM_END. +2003/02/26: [spec] add new syntax behavior REG_SYN_STRICT_CHECK_BACKREF_NUMBER. + if it is setted, then error /(\1)/, /\1(..)/ etc... +2003/02/26: [spec] if backref number is greater than max group number, + then return compile error. (REGERR_INVALID_BACKREF_NUMBER) +2003/02/26: [tune] bad implemented N_ALT case in get_min_match_length(). +2003/02/26: [dist] auto update testc.c and win32/testc.c in dist target. +2003/02/26: [impl] add -win option to testconv.rb. +2003/02/25: [spec] allow to assign same name to different group. + add OP_BACKREF_MULTI. +2003/02/24: [impl] reduce redundant repeat of empty target. + ex. /()*/ ==> /()?/, /()+/ ==> /()/, /(?:)+/ ==> // +2003/02/24: [impl] change condition in regex_is_allow_reverse_match(). +2003/02/24: [impl] convert i(/../, ...) functions in testconv.rb. +2003/02/24: [impl] change name table struct. + +2003/02/22: Version 1.8.0 + +2003/02/22: [new] named subexp, named back reference. (thanks akr) + define: (?<name>...), back-ref: \g<name> +2003/02/22: [impl] use str_node_can_be_split(). +2003/02/21: [dist] add sample/posix.c +2003/02/21: [spec] rename some error code symbols. +2003/02/21: [spec] max number of multibyte ranges(255) is small. + 255 --> 1000. (thanks MoonWolf) +2003/02/20: [new] supported Basic Regular Expression(BRE) in POSIX API. + (REG_EXTENDED option: Extended RE) +2003/02/20: [new] variable syntax. + +2003/02/12: Version 1.7.2 + +2003/02/12: [bug] mismatch /\?a/i.match('?A'). + check raw value in scan_make_node() and scan_backslash(). + (thanks Nobu) +2003/02/12: [impl] rename 'max_mem' to 'num_mem' in regex_t. +2003/02/12: [impl] rename 'code' to 'enc' in regex_t. +2003/02/12: [spec] remove transtable argument in regex_new and regex_recompile. + remove transtable member in regex_t. +2003/02/10: [inst] change backup file suffix name from '.orig' to '.ruby_orig'. + (win32/Makefile) +2003/02/10: [spec] number check in scan_char_class() ignore-case mode. + ex. /[\x58-\x64]/i +2003/02/10: [impl] don't use OP_MEMORY_END_PUSH (and STK_MEM_END). +2003/02/10: [impl] lift up head_exact value from child qualifier node to parent. +2003/02/10: [tune] change stack type values. +2003/02/10: [dist] add HISTORY. +2003/02/08: [tune] change stack type values. +2003/02/08: [tune] add STACK_BASE_CHECK(). +2003/02/08: [tune] add STACK_PUSH_ENSURED(). +2003/02/08: [dist] change contents of doc/API. +2003/02/07: [inst] change backup file suffix name from '.orig' to '.ruby_orig'. +2003/02/07: [spec] range in char-class should be same spec. with Ruby + in ignore-case mode. (ex. /[A-c]/i == /[a-c]/i) + (thanks MoonWolf) +2003/02/07: [spec] [!--] should be allowed. (thanks MoonWolf) +2003/02/07: [dist] refresh re.c.180.patch for re.c (2003-02-06). + +2003/02/07: Version 1.7.1 + +2003/02/07: [impl] check first byte of string in ignore-case mode. + (get_head_exact_node()) +2003/02/07: [impl] remove redundant statements in setup_tree(). +2003/02/06: [new] create Win32 DLL. +2003/02/06: [impl] use P_() macro for function prototype. +2003/02/06: [impl] add HAVE_PROTOTYPE, HAVE_STDARG_PROTOTYPES in + configure.in and config.h.in. +2003/02/06: [spec] /[0-9-a]/ is allowed as usual char '-' and 'a' in Ruby. + add USE_BETTER_COMPATIBILITY_FOR_ORIGINAL_REGEX in + regint.h. (thanks MoonWolf) +2003/02/06: [spec] rename REG_MBCTYPE_XXXX to REG_ENCODING_XXXX in onigposix.h. +2003/02/05: [spec] rename MBCTYPE_XXXX to REG_MBCTYPE_XXXX in onigposix.h. +2003/02/05: [spec] add POSIX API error REG_EONIG_THREAD to onigposix.h. +2003/02/05: [dist] add .cvsignore file. + +2003/02/04: Version 1.7 + +2003/02/04: [bug] typo miss in regex_region_copy(). +2003/02/04: [impl] change THREAD_PASS macro. (regint.h) +2003/02/04: [dist] add API document file doc/API. +2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then + set REG_OPTIMIZE_EXACT_BM in set_optimize_exact_info(). +2003/02/04: [spec] reimplement regex_clone() and it is obsoleted. +2003/02/04: [bug] add REGERR_OVER_THREAD_PASS_LIMIT_COUNT + to regerror.c regposix.c. +2003/02/03: [bug] Hankaku-Kana may be second byte in Shift_JIS + regex_is_allow_reverse_match(). +2003/02/03: [impl] add optimization type REG_OPTIMIZE_EXACT_BM_NOT_REV. + remove exact_allow_reverse_match member in regex_t. +2003/02/03: [impl] add exact_allow_reverse_match member in regex_t. +2003/02/03: [impl] compile-search conflict in regex_search() is handled. +2003/02/01: [tune] decrease regex_region_clear() calling from regex_search(). +2003/02/01: [tune] remove region argument from match_at(). +2003/01/31: [tune] don't use strlen() in regexec() and regcomp(). +2003/01/31: [tune] decrease regex_reduce_chain() calling in regex_search(). +2003/01/31: [bug] STRING_CMP() in regexec.c was wrong in ignore-case. +2003/01/31: [impl] convert to lower-case char at string compile time. + change SBTRANSCMP() in regexec.c. +2003/01/31: [impl] rename TTRANS() to TOLOWER(). +2003/01/30: [bug] .c.o --> .c.obj in win32\Makefile. +2003/01/30: [impl] add -DNOT_RUBY to Makefile.in. + NOT_RUBY is refered in regint.h for escape double + including config.h. +2003/01/30: [impl] when string hasn't case ambiguity, don't compile + to ignore case opcode. +2003/01/29: [impl] add SJIS, UTF-8 test_sb() test. +2003/01/29: [dist] add INSTALL-RUBY file. +2003/01/28: [test] success in Cygwin, Ruby 1.8.0 (2003-01-27). +2003/01/24: [inst] add rback target to Makefile.in. +2003/01/24: [impl] change SBCMP() -> IS_NEWLINE() in match_at(). +2003/01/23: [impl] add encoding arg to scan_xxxx_number(). +2003/01/23: [impl] rename WCInt to WCINT. +2003/01/22: [bug] POSIX API regexec() was not thread safe. + remove region member from POSIX regex_t. + [new] add search time option REG_OPTION_POSIX_REGION. + (region argument is treated as regmatch_t[] type) + speed up regexec(). +2003/01/22: [memo] start CVS entry in my box. + +2003/01/21: Version 1.6 + +2003/01/21: [test] Mac OS X 10.1, Ruby 1.8.0 (2003-01-20) +2003/01/20: [impl] add UTF-8 check to test.rb. (thanks UENO Katsuhiro) +2003/01/18: [impl] change REGION_NOTPOS to REG_REGION_NOTPOS in regex.h. +2003/01/17: [dist] add sample/simple.c. +2003/01/17: [inst] add configure option --with-rubydir. +2003/01/17: [bug] bad implemeted POSIX API options. + default: /./ not match "\n", anchor not match "\n" + REG_NEWLINE: /./ not match "\n", anchor match "\n" +2003/01/16: [impl] rewrite POSIX API regexec() for speed up. +2003/01/16: [impl] add region member to POSIX regex_t struct. +2003/01/16: [inst] rename library file from 'libregex.a' to 'libonig.a'. +2003/01/15: [dist] add testc.c to distribution file. +2003/01/15: [test] success in 'make rtest/ctest/ptest' on Windows 2000. +2003/01/15: [bug] change '/' to \' in win32/Makefile. +2003/01/14: [test] success in Ruby make test on Windows 2000. + VC++6.0, Ruby 1.6.8 (2003-01-12) +2003/01/14: [inst] change Makefile.in and win32/Makefile. +2003/01/11: [inst] changes for Win32 platform. (regint.h, reggnu.c, regcomp.c) +2003/01/11: [dist] add win32 directory. (config.h, Makefile, testc.c) +2003/01/10: [inst] add onigposix.h to install target. (Makefile.in) +2003/01/10: [bug] lacked a comma in ESTRING[]. (regposerr.c) +2003/01/10: [bug] local variable name was wrong. buf -> tbuf (regerror()) +2003/01/10: [spec] remove REG_RUBY_M17N case from onigposix.h and regposix.c. + +2003/01/09: Version 1.5 + +2003/01/09: [inst] replace Ruby re.c.XXX.patch files. (166 -> 168, 172 -> 180) +2003/01/09: [new] implement POSIX API. (thanks knu) + (onigposix.h, regposix.c, regposerr.c) +2003/01/08: [spec] remove REGERR_END_PATTERN_AFTER_BACKSLASH in regex.h. +2003/01/08: [spec] region arg can be NULL in regex_search() and regex_match(). + +2003/01/08: Version 1.4 + +2003/01/08: [inst] add test program converter (test.rb -> testc.c). +2003/01/08: [bug] move GET_WCINT() from regcomp.c to regint.h. +2003/01/07: [inst] add new test script (test.rb). +2002/12/30: [bug] wrong merge in multibyte mode (alt_merge_opt_exact_info()). +2002/12/28: [inst] add rtest target to Makefile.in. +2002/12/28: [bug] /\xfe/.match("\xfe") mismatch in multibyte mode. + add "raw" flag arg to concat_opt_exact_info_str(). +2002/12/25: [bug] check condition was wrong in alt_merge_opt_map_info(). +2002/12/25: [impl] add threshold_len check in regex_search(). +2002/12/23: [bug] prec-read in alternative (/a|(?=z).f/.match("zf") => nil) +2002/12/23: [bug] \G in alternative (/a|\Gz/.match("bza") => "z"). + add start member in MatchArg. (regexec.c) +2002/12/21: [impl] **** rewrite all optimization process. **** +2002/12/16: [impl] remove node subtype EFFECT_EMPTY. +2002/12/12: [impl] reconstruct node types. (regcomp.c) +2002/12/11: [impl] add regerror.c +2002/12/10: [bug] [ruby-dev:19042] (thanks Nobu) + anchor(\G etc...) influenced outside of "|". (/a|\Gb/) +2002/11/30: [bug] [ruby-dev:18966] (thanks Nobu) + char-class(\S, [^\s] etc...) optimize map-info was wrong. +2002/11/29: [bug] infinite loop on NULL-pointer str search (regex_search()). + (thanks matz) +2002/11/29: [bug] change static -> extern (regex_chain_reduce()). +2002/11/29: [bug] change encoding to RegDefaultCharEncoding + in re_recompile_pattern(). (adapt to re.c) +2002/04/24: [spec] USE_ONIGURUMA_EXTENSION is disabled in default. +2002/04/24: [new] add searching time option: REG_OPTION_NOTBOL/NOTEOL. + add searching time option argument to regex_search() and + regex_match(). (prepare for POSIX API) +2002/04/20: [impl] divide regex.c file into regcomp.c, regexec.c, reggnu.c + and regint.h. +2002/04/09: [impl] move IS_MULTILINE() to outside of loop in OP_ANYCHAR_STAR. +2002/04/08: [impl] don't use OP_REPEAT operator for '??'. +2002/04/06: [impl] reduce redundant nested repeat operators(?,*,+,??,*?,+?). + ex. (?:a*)?, (?:a??)* etc.. +2002/04/06: [spec] should not warn for /(?:a?)+?/. +2002/04/04: [spec] should allow fixed length alternative and repeat pattern + in look-behind. ex. /(?<=(a|b){3})/ (thanks Guy Decoux) +2002/04/02: [spec] should warn for /(?:a+)?/ and /(?:a*)??/. (thanks akr) + +2002/04/01: Version 1.3 + +2002/04/01: [dist] add COPYING. +2002/03/30: [spec] warn redundant nested repeat operator + in Ruby verbose mode. ex. (?:a*)? +2002/03/30: [spec] nested repeat operator error check should be + same with GNU regex. (thanks Guy Decoux) +2002/03/30: [new] add \x{hexadecimal-wide-char}. (thanks matz) +2002/03/27: [bug] MBCTYPE_XXX symbol values should be same with GNU regex. +2002/03/27: [impl] add THREAD_ATOMIC to regex_clone(), regex_init(), regex_end(). +2002/03/25: [spec] if encoding is utf-8, allow combination of singlebyte and + multibyte code range in char class. + (cancelled 2002/04/01: for M17N compatibility) +2002/03/25: [dist] description of the license condition is added to README. +2002/03/23: [bug] should set all bits of reg->mem_stats, + if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY. +2002/03/23: [new] add a new option REG_OPTION_NOT_EMPTY. +2002/03/20: [spec] allow incompleted left brace as an usual char. + ex. /{/, /({)/, /a{2,3/ etc... +2002/03/20: [impl] serialize integer in bytecode. + (switch by UNALIGNED_WORD_ACCESS in regex.c) +2002/03/20: [impl] change re_mbcinit() for REG_RUBY_M17N. +2002/03/19: [impl] word alignment of char class multi-byte code ranges. +2002/03/19: [impl] replace OP_EXACTMB4N with OP_EXACTMB3N. +2002/03/19: [bug] OP_CCLASS_MB_NOT process in matchAt() is wrong. +2002/03/19: [new] add re_mbctab[] for Ruby extension library compatibility. +2002/03/19: [spec] allow nested repeat operator, if operator is {n,m} type. +2002/03/19: [new] add REG_IS_PATTERN_ERROR(ecode) in regex.h +2002/03/18: [spec] /[a-b-c]/ should be error. +2002/03/18: [bug] /[\w-a]/ should be error. (thanks Guy Decoux) +2002/03/18: [bug] /[\]/ should be error. (thanks Guy Decoux) +2002/03/18: [bug] /()*/ etc.. should not be error. (thanks Guy Decoux) +2002/03/18: [spec] /a{1}*/ should not be error. (thanks Guy Decoux) +2002/03/18: [bug] ab{2}{3} was interpreded to (?:a(?:b{2})){3} + (thanks Guy Decoux) +2002/03/18: [bug] abort /(?i)*a/ etc... (thanks Guy Decoux) +2002/03/18: [bug] abort /a|*/,/a|{1}/ etc... (thanks Guy Decoux) + +2002/03/13: Version 1.2 + +2002/03/13: [test] success in rubicon/builtin/AllBuiltinTests.rb. + (thanks rubicon) +2002/03/13: [bug] OP_EXACTMBN process in matchAt() is wrong. +2002/03/13: [bug] start argument of BackwardSearchRange() is wrong. +2002/03/12: [spec] change function name style from CamelCase + to underline_separation. (includes API) +2002/03/12: [bug] if pattern has nested null-check, cause infinite loop. + correct STACK_NULL_CHECK() macro. (thanks Guy Decoux) +2002/03/11: [bug] it is wrong that four numbers to continue as + an octal value in scanBackSlash(). ex. /\0111/ + (thanks matz) +2002/03/11: [new] \k (single-byte word char), \K (multi-byte char). +2002/03/09: [inst] add two targets to Makefile.in (166 and 172). +2002/03/09: [spec] decrease REG_MAX_BACKREF_NUM, REG_MAX_REPEAT_NUM + values. +2002/03/08: [spec] allow use of "\A"(begin-buf) in look-behind. +2002/03/08: [impl] add a new opcode OP_PUSH_IF_PEEK_NEXT. +2002/03/08: [impl] add a new opcode OP_ANYCHAR_STAR_PEEK_NEXT. +2002/03/07: [spec] prohibit use of capture group "(...)" + in negative look-behind. +2002/03/07: [inst] add configure.in, config.h.in, Makefile.in. +2002/03/07: [impl] call Init_REGEX_STAT() in RegexInit(). +2002/03/07: [spec] less length string match with negative look-behind. + ex. /(?<!XXX)a/.match("Xa"). (thanks Nobu) +2002/03/06: [impl] expand repeated string, if expanded length <= 100. + ex. /(?:abc){10}/ +2002/03/06: [new] add a symbol REG_TRANSTABLE_USE_DEFAULT in regex.h. +2002/03/06: [impl] rename RegDefaultCharCode to RegDefaultCharEncoding. +2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens + in ScanMakeNode(). (beware of strchr(). thanks Nobu) +2002/03/06: [bug] range argument of ForwardSearchRange() is wrong. + ex. /\A.a/, /\G.a/ mismatched with "aa". (thanks Nobu) +2002/03/05: [new] add RegexMatch() API. rename regexMatch() to matchAt(). +2002/03/05: [impl] change function definition style. +2002/03/05: [impl] abolish use of macro symbol which name begin with underline. +2002/03/04: [bug] make up a break-statement in compileTree(). + (compile error on Mac OS X 10.1.3) + +2002/03/04: Version 1.1 + +2002/03/04: [impl] replace STK_BOTTOM with STK_ALT. +2002/03/02: [impl] add new opcode OP_FINISH and new stack type + STK_BOTTOM for (little bit) speed up STACK_POP. +2002/03/02: [impl] add new opcode OP_EXACT1_IC, OP_EXACTN_IC + for compile time ignore case check. + remove opcode OP_EXACT1_RAW, OP_EXACTN_RAW. +2002/03/02: [impl] add OpTime info to statistical data. +2002/02/28: [bug] sub_anchor($) in ForwardSearch() and BackwardSearch(). + ex. /$\x0az/.match("\nz") +2002/02/28: [new] look-behind (?<=pattern), (?<!pattern). +2002/02/27: [bug] use StackIndex instead of StackType* for realloc problem. +2002/02/27: [impl] use m17n_codepoint() as mb2wc() in REG_RUBY_M17N. +2002/02/27: [spec] undefined POSIX bracket /[[:xyz:]]/ should be syntax error. +2002/02/26: [bug] ex. /$*/, /[a-]/, /((?i)a)b/ (thanks matz) + +2002/02/25: Version 1.0 (first release) + +-- +[bug: bug fix] +[API: API change/new/delete] +[new: new feature] +[spec: specification change] +[impl: implementation change] +[tune: tune for speed up] +[inst: changes for installation] +[dist: distribution change] +[test: test] +[memo: memo] +-- +<CVS: show all tags> +cvs history -T + +<CVS: add tag> +cvs rtag "VERSION_X_X_X" oniguruma + + +<GNU Autotools: bootstrap> +* write Makefile.am and configure.in. +> aclocal +> libtoolize +> automake --foreign --add-missing +> autoconf +> configure --with-rubydir=... CFLAGS="-O2 -Wall" + + +<GNU libtool: version management> + + VERSION = current:revision:age + + current: interface number (from 0) + revision: implementation number of same interface (from 0) + age: number of supported previous interfaces + (if current only supported then age == 0) + +//END diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README new file mode 100644 index 0000000..dff7fba --- /dev/null +++ b/ext/mbstring/oniguruma/README @@ -0,0 +1,189 @@ +README 2007/06/18 + +Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + +http://www.geocities.jp/kosako3/oniguruma/ +http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ + +Oniguruma is a regular expressions library. +The characteristics of this library is that different character encoding +for every regular expression object can be specified. + +Supported character encodings: + + ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, + EUC-JP, EUC-TW, EUC-KR, EUC-CN, + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, + ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, + ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, + ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 + +* GB 18030: contributed by KUBO Takehiro +* KOI8 is not included in library archive by default setup. + (need to edit Makefile if you want to use it.) +------------------------------------------------------------ + +Install + + Case 1: Unix and Cygwin platform + + 1. ./configure + 2. make + 3. make install + + * uninstall + + make uninstall + + * test (ASCII/EUC-JP) + + make atest + + * configuration check + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix + + + + Case 2: Win32 platform (VC++) + + 1. copy win32\Makefile Makefile + 2. copy win32\config.h config.h + 3. nmake + + onig_s.lib: static link library + onig.dll: dynamic link library + + * test (ASCII/Shift_JIS) + 4. copy win32\testc.c testc.c + 5. nmake ctest + + + +License + + When this software is partly used or it is distributed with Ruby, + this of Ruby follows the license of Ruby. + It follows the BSD license in the case of the one except for it. + + + +Regular Expressions + + See doc/RE (or doc/RE.ja for Japanese). + + +Usage + + Include oniguruma.h in your program. (Oniguruma API) + See doc/API for Oniguruma API. + + If you want to disable UChar type (== unsigned char) definition + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + include oniguruma.h. + + If you want to disable regex_t type definition in oniguruma.h, + define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. + + Example of the compiling/linking command line in Unix or Cygwin, + (prefix == /usr/local case) + + cc sample.c -L/usr/local/lib -lonig + + + If you want to use static link library(onig_s.lib) in Win32, + add option -DONIG_EXTERN=extern to C compiler. + + + +Sample Programs + + sample/simple.c example of the minimum (Oniguruma API) + sample/names.c example of the named group callback. + sample/encode.c example of some encodings. + sample/listcap.c example of the capture history. + sample/posix.c POSIX API sample. + sample/sql.c example of the variable meta characters. + (SQL-like pattern matching) + sample/syntax.c Perl, Java and ASIS syntax test. + + +Source Files + + oniguruma.h Oniguruma API header file. (public) + onig-config.in configuration check program template. + + regenc.h character encodings framework header file. + regint.h internal definitions + regparse.h internal definitions for regparse.c and regcomp.c + regcomp.c compiling and optimization functions + regenc.c character encodings framework. + regerror.c error message function + regext.c extended API functions. (deluxe version API) + regexec.c search and match functions + regparse.c parsing functions. + regsyntax.c pattern syntax functions and built-in syntax definitions. + regtrav.c capture history tree data traverse functions. + regversion.c version info function. + st.h hash table functions header file + st.c hash table functions + + oniggnu.h GNU regex API header file. (public) + reggnu.c GNU regex API functions + + onigposix.h POSIX API header file. (public) + regposerr.c POSIX error message function. + regposix.c POSIX API functions. + + enc/mktable.c character type table generator. + enc/ascii.c ASCII encoding. + enc/euc_jp.c EUC-JP encoding. + enc/euc_tw.c EUC-TW encoding. + enc/euc_kr.c EUC-KR, EUC-CN encoding. + enc/sjis.c Shift_JIS encoding. + enc/big5.c Big5 encoding. + enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro) + enc/koi8.c KOI8 encoding. + enc/koi8_r.c KOI8-R encoding. + enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) + enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) + enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) + enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) + enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) + enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) + enc/iso8859_7.c ISO-8859-7 encoding. (Greek) + enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) + enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) + enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) + enc/iso8859_11.c ISO-8859-11 encoding. (Thai) + enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) + enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) + enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) + enc/iso8859_16.c ISO-8859-16 encoding. + (Latin-10 or South-Eastern European with Euro) + enc/utf8.c UTF-8 encoding. + enc/utf16_be.c UTF-16BE encoding. + enc/utf16_le.c UTF-16LE encoding. + enc/utf32_be.c UTF-32BE encoding. + enc/utf32_le.c UTF-32LE encoding. + enc/unicode.c Unicode information data. + + win32/Makefile Makefile for Win32 (VC++) + win32/config.h config.h for Win32 + + + +API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6 + + + re_compile_fastmap() is removed. + + re_alloc_pattern() is added. + + + +I'm thankful to Akinori MUSHA. + + +Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja new file mode 100644 index 0000000..2dee793 --- /dev/null +++ b/ext/mbstring/oniguruma/README.ja @@ -0,0 +1,192 @@ +README.ja 2007/06/18 + +µ´¼Ö ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + +http://www.geocities.jp/kosako3/oniguruma/ +http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ + +µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£ +¤³¤Î¥é¥¤¥Ö¥é¥ê¤ÎÆÃĹ¤Ï¡¢¤½¤ì¤¾¤ì¤ÎÀµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤´¤È¤Ë +ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò»ØÄê¤Ç¤¤ë¤³¤È¤Ç¤¢¤ë¡£ + +¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°: + + ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, + EUC-JP, EUC-TW, EUC-KR, EUC-CN, + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, + ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, + ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, + ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 + +* GB 18030: µ×ÊÝ·òÍλáÄó¶¡ +* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£ + (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È) +------------------------------------------------------------ + +¥¤¥ó¥¹¥È¡¼¥ë + + ¥±¡¼¥¹£±: Unix¤ÈCygwin´Ä¶ + + 1. ./configure + 2. make + 3. make install + + ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë + + make uninstall + + ưºî¥Æ¥¹¥È (ASCII/EUC-JP) + + make atest + + + ¹½À®³Îǧ + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix + + + + ¥±¡¼¥¹£²: Win32(VC++)´Ä¶ + + 1. copy win32\Makefile Makefile + 2. copy win32\config.h config.h + 3. nmake + + onig_s.lib: static link library + onig.dll: dynamic link library + + * ưºî¥Æ¥¹¥È (ASCII/Shift_JIS) + 4. copy win32\testc.c testc.c + 5. nmake ctest + + +¥é¥¤¥»¥ó¥¹ + + ¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤¬Ruby¤È°ì½ï¤Ë»ÈÍѤޤ¿¤ÏÇÛÉÛ¤µ¤ì¤ë¾ì¹ç¤Ë¤Ï¡¢ + Ruby¤Î¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£ + ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¡¢BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£ + + +Àµµ¬É½¸½ + + doc/RE.ja¤ò»²¾È + + +»ÈÍÑÊýË¡ + + »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£ + Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç + ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬ + ͸ú¤Ë¤Ê¤ë¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢ + ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦ + ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í¸ú¤Ë¤Ê¤ë¡£ + + Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§ + (prefix¤¬/usr/local¤Î¤È¤) + cc sample.c -L/usr/local/lib -lonig + + GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ + ¤¤¤ì¤Ð¡¢»ÈÍѤǤ¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£ + ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î + ´Ä¶ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£ + + + Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢ + ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£ + + +»ÈÍÑÎã¥×¥í¥°¥é¥à + + sample/simple.c ºÇ¾®Îã (Oniguruma API) + sample/names.c ̾Á°ÉÕ¤¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã + sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã + sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã + sample/posix.c POSIX API»ÈÍÑÎã + sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó) + sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È + + +¥½¡¼¥¹¥Õ¥¡¥¤¥ë + + oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«) + onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È + + regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ + regint.h ÆâÉôÀë¸À + regparse.h regparse.c¤Èregcomp.c¤Î¤¿¤á¤ÎÆâÉôÀë¸À + regcomp.c ¥³¥ó¥Ñ¥¤¥ë¡¢ºÇŬ²½´Ø¿ô + regenc.c ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁÈ¤ß + regerror.c ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô + regext.c ³ÈÄ¥API´Ø¿ô + regexec.c ¸¡º÷¡¢¾È¹ç´Ø¿ô + regparse.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥ó²òÀÏ´Ø¿ô + regsyntax.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ´Ø¿ô¡¢Áȹþ¤ßʸˡÄêµÁ + regtrav.c Êá³ÍÍúÎòÌÚ½ä²ó´Ø¿ô + regversion.c ÈǾðÊó´Ø¿ô + st.h ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ôÀë¸À + st.c ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ô + + oniggnu.h GNU regex API¥Ø¥Ã¥À (¸ø³«) + reggnu.c GNU regex API´Ø¿ô + + onigposix.h POSIX API¥Ø¥Ã¥À (¸ø³«) + regposerr.c POSIX API¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô + regposix.c POSIX API´Ø¿ô + + enc/mktable.c ʸ»ú¥¿¥¤¥×¥Æ¡¼¥Ö¥ëÀ¸À®¥×¥í¥°¥é¥à + enc/ascii.c ASCII ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/euc_jp.c EUC-JP ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/euc_tw.c EUC-TW ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡) + enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/iso8859_1.c ISO-8859-1 (Latin-1) + enc/iso8859_2.c ISO-8859-2 (Latin-2) + enc/iso8859_3.c ISO-8859-3 (Latin-3) + enc/iso8859_4.c ISO-8859-4 (Latin-4) + enc/iso8859_5.c ISO-8859-5 (Cyrillic) + enc/iso8859_6.c ISO-8859-6 (Arabic) + enc/iso8859_7.c ISO-8859-7 (Greek) + enc/iso8859_8.c ISO-8859-8 (Hebrew) + enc/iso8859_9.c ISO-8859-9 (Latin-5 ¤Þ¤¿¤Ï Turkish) + enc/iso8859_10.c ISO-8859-10 (Latin-6 ¤Þ¤¿¤Ï Nordic) + enc/iso8859_11.c ISO-8859-11 (Thai) + enc/iso8859_13.c ISO-8859-13 (Latin-7 ¤Þ¤¿¤Ï Baltic Rim) + enc/iso8859_14.c ISO-8859-14 (Latin-8 ¤Þ¤¿¤Ï Celtic) + enc/iso8859_15.c ISO-8859-15 (Latin-9 ¤Þ¤¿¤Ï West European with Euro) + enc/iso8859_16.c ISO-8859-16 + (Latin-10 ¤Þ¤¿¤Ï South-Eastern European with Euro) + enc/utf8.c UTF-8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/utf16_be.c UTF-16BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/utf16_le.c UTF-16LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/utf32_be.c UTF-32BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/utf32_le.c UTF-32LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/unicode.c Unicode¾ðÊó + + win32/Makefile Win32ÍÑ Makefile (for VC++) + win32/config.h Win32ÍÑ config.h + + + +Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ + + + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£ + + re_alloc_pattern() ¤¬Äɲ䵤줿¡£ + + +I'm thankful to Akinori MUSHA. + + +¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in new file mode 100644 index 0000000..4a2fc28 --- /dev/null +++ b/ext/mbstring/oniguruma/config.h.in @@ -0,0 +1,108 @@ +/* config.h.in. Generated from configure.in by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define if compilerr supports prototypes */ +#undef HAVE_PROTOTYPES + +/* Define if compiler supports stdarg prototypes */ +#undef HAVE_STDARG_PROTOTYPES + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/times.h> header file. */ +#undef HAVE_SYS_TIMES_H + +/* Define to 1 if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of a `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of a `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of a `short', as computed by sizeof. */ +#undef SIZEOF_SHORT + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at run-time. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */ +#undef TIME_WITH_SYS_TIME + +/* Define if combination explosion check */ +#undef USE_COMBINATION_EXPLOSION_CHECK + +/* Version number of package */ +#undef VERSION + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API new file mode 100644 index 0000000..2f66287 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/API @@ -0,0 +1,585 @@ +Oniguruma API Version 4.7.1 2007/07/04 + +#include <oniguruma.h> + + +# int onig_init(void) + + Initialize library. + + You don't have to call it explicitly, because it is called in onig_new(). + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + Get error message string. + If this function is used for onig_new(), + don't call this after the pattern argument of onig_new() is freed. + + normal return: error message string length + + arguments + 1 err_buf: error message string buffer. + (required size: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: error code returned by other API functions. + 3 err_info (optional): error info returned by onig_new(). + + +# void onig_set_warn_func(OnigWarnFunc func) + + Set warning function. + + WARNING: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + Set verbose warning function. + + WARNING: + redundant nested repeat operator. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Create a regex object. + + normal return: ONIG_NORMAL + + arguments + 1 reg: return regex object's address. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 option: compile time options. + + ONIG_OPTION_NONE no option + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' + ONIG_OPTION_MULTILINE '.' match with newline + ONIG_OPTION_IGNORECASE ambiguity match on + ONIG_OPTION_EXTEND extended pattern form + ONIG_OPTION_FIND_LONGEST find longest match + ONIG_OPTION_FIND_NOT_EMPTY ignore empty match + ONIG_OPTION_NEGATE_SINGLELINE + clear ONIG_OPTION_SINGLELINE which is enabled on + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA + + ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured. + ONIG_OPTION_CAPTURE_GROUP named and no-named group captured. + + 5 enc: character encoding. + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8 KOI8 + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB 18030 + + or any OnigEncodingType data address defined by user. + + 6 syntax: address of pattern syntax definition. + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + named group + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_DEFAULT default (== Ruby) + onig_set_default_syntax() + + or any OnigSyntaxType data address defined by user. + + 7 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + Create a regex object. + This function is deluxe version of onig_new(). + + normal return: ONIG_NORMAL + + arguments + 1 reg: return address of regex object. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 ci: compile time info. + + ci->num_of_elements: number of elements in ci. (current version: 5) + ci->pattern_enc: pattern string character encoding. + ci->target_enc: target string character encoding. + ci->syntax: address of pattern syntax definition. + ci->option: compile time option. + ci->ambig_flag: character matching ambiguity bit flag for + ONIG_OPTION_IGNORECASE mode. + + ONIGENC_AMBIGUOUS_MATCH_NONE: exact + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII + ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on + ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) + onig_set_default_ambig_flag() + + 5 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + Different character encoding combination is allowed for + the following cases only. + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + Free memory used by regex object. + + arguments + 1 reg: regex object. + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + Search string and return search result and matching region. + + normal return: match position offset (i.e. p - str >= 0) + not found: ONIG_MISMATCH (< 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 start: search start address of target string + 5 range: search terminate address of target string + in forward search (start <= searched string head < range) + in backward search (range <= searched string head <= start) + 6 region: address for return group match range info (NULL is allowed) + 7 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) + + Match string and return result and matching region. + + normal return: match length (>= 0) + not match: ONIG_MISMATCH ( < 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 at: match address of target string + 5 region: address for return group match range info (NULL is allowed) + 6 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. + + +# OnigRegion* onig_region_new(void) + + Create a region. + + +# void onig_region_free(OnigRegion* region, int free_self) + + Free memory used by region. + + arguments + 1 region: target region + 2 free_self: [1: free all, 0: free memory used in region but not self] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + Copy contents of region. + + arguments + 1 to: target region + 2 from: source region + + +# void onig_region_clear(OnigRegion* region) + + Clear contents of region. + + arguments + 1 region: target region + + +# int onig_region_resize(OnigRegion* region, int n) + + Resize group range area of region. + + normal return: ONIG_NORMAL + + arguments + 1 region: target region + 2 n: new size + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + Return the group number list of the name. + Named subexp is defined by (?<name>....). + + normal return: number of groups for the name. + (ex. /(?<x>..)(?<x>..)/ ==> 2) + name not found: -1 + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 num_list: return list of group number. + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + Return the group number corresponding to the named backref (\k<name>). + If two or more regions for the groups of the name are effective, + the greatest number in it is obtained. + + normal return: group number. + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 region: search/match result region. + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + Iterate function call for all names. + + normal return: 0 + error: func's return value. + + arguments + 1 reg: regex object. + 2 func: callback function. + func(name, name_end, <number of groups>, <group number's list>, + reg, arg); + if func does not return 0, then iteration is stopped. + 3 arg: argument for func. + + +# int onig_number_of_names(regex_t* reg) + + Return the number of names defined in the pattern. + Multiple definitions of one name is counted as one. + + arguments + 1 reg: regex object. + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigAmbigType onig_get_ambig_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + Return a value of the regex object. + + arguments + 1 reg: regex object. + + +# int onig_number_of_captures(regex_t* reg) + + Return the number of capture group in the pattern. + + arguments + 1 reg: regex object. + + +# int onig_number_of_capture_histories(regex_t* reg) + + Return the number of capture history defined in the pattern. + + You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY + is disabled in the pattern syntax.(disabled in the default syntax) + + arguments + 1 reg: regex object. + + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + Return the root node of capture history data tree. + + This value is undefined if matching has faild. + + arguments + 1 region: matching result. + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + Traverse and callback in capture history data tree. + + normal return: 0 + error: callback func's return value. + + arguments + 1 region: match region data. + 2 at: callback position. + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs. + ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse childs first, then callback. + ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse childs, + and at last callback again. + + 3 func: callback function. + if func does not return 0, then traverse is stopped. + + int func(int group, int beg, int end, int level, int at, + void* arg) + + group: group number + beg: capture start position + end: capture end position + level: nest level (from 0) + at: callback position + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: optional callback argument + + 4 arg; optional callback argument. + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + Return noname group capture activity. + + active: 1 + inactive: 0 + + arguments + 1 reg: regex object. + + if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON + --> inactive + + if the regex pattern have named group + and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON + and option ONIG_OPTION_CAPTURE_GROUP == OFF + --> inactive + + else --> active + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + Return previous character head address. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return left-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return right-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + Return number of characters in the string. + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + Return number of bytes in the string. + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + Set default syntax. + + arguments + 1 syntax: address of pattern syntax definition. + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + Copy syntax. + + arguments + 1 to: destination address. + 2 from: source address. + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + Get/Set elements of the syntax. + + arguments + 1 syntax: syntax + 2 op, op2, behavior, options: value of element. + + +# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) + + Copy encoding. + + arguments + 1 to: destination address. + 2 from: source address. + + +# int onig_set_meta_char(OnigEncoding enc, unsigned int what, + OnigCodePoint code) + + Set a variable meta character to the code point value. + Except for an escape character, this meta characters specification + is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective + by the syntax. (Build-in syntaxes are not effective.) + + normal return: ONIG_NORMAL + + arguments + 1 enc: target encoding + 2 what: specifies which meta character it is. + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR. + + +# OnigAmbigType onig_get_default_ambig_flag() + + Get default ambig flag. + + +# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) + + Set default ambig flag. + + 1 ambig_flag: ambiguity flag + + +# unsigned int onig_get_match_stack_limit_size(void) + + Return the maximum number of stack size. + (default: 0 == unlimited) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + Set the maximum number of stack size. + (size = 0: unlimited) + + normal return: ONIG_NORMAL + + +# int onig_end(void) + + The use of this library is finished. + + normal return: ONIG_NORMAL + + It is not allowed to use regex objects which created + before onig_end() call. + + +# const char* onig_version(void) + + Return version string. (ex. "2.2.8") + +// END diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja new file mode 100644 index 0000000..f2a8bd6 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/API.ja @@ -0,0 +1,592 @@ +µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.7.1 2007/07/04 + +#include <oniguruma.h> + + +# int onig_init(void) + + ¥é¥¤¥Ö¥é¥ê¤Î½é´ü²½ + + onig_new()¤ÎÃæ¤Ç¸Æ¤Ó½Ð¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤Î´Ø¿ô¤òÌÀ¼¨Åª¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤¯¤Æ¤â¤è¤¤¡£ + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò¼èÆÀ¤¹¤ë¡£ + + ¤³¤Î´Ø¿ô¤ò¡¢onig_new()¤Î·ë²Ì¤ËÂФ·¤Æ¸Æ¤Ó½Ð¤¹¾ì¹ç¤Ë¤Ï¡¢onig_new()¤Îpattern°ú¿ô¤ò + ¥á¥â¥ê²òÊü¤¹¤ë¤è¤ê¤âÁ°¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸Ê¸»úÎó¤Î¥Ð¥¤¥ÈĹ + + °ú¿ô + 1 err_buf: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò³ÊǼ¤¹¤ëÎΰè + (ɬÍפʥµ¥¤¥º: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: ¥¨¥é¡¼¥³¡¼¥É + 3 err_info (optional): onig_new()¤Îerr_info + + +# void onig_set_warn_func(OnigWarnFunc func) + + ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£ + + ·Ù¹ð: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + °ú¿ô + 1 func: ·Ù¹ð´Ø¿ô void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + ¾ÜºÙ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£ + + ¾ÜºÙ·Ù¹ð: + redundant nested repeat operator. + + °ú¿ô + 1 func: ¾ÜºÙ·Ù¹ð´Ø¿ô void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó + 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length) + 4 option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NONE ¥ª¥×¥·¥ç¥ó¤Ê¤· + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' + ONIG_OPTION_MULTILINE '.'¤¬²þ¹Ô¤Ë¥Þ¥Ã¥Á¤¹¤ë + ONIG_OPTION_IGNORECASE Û£Ëæ¥Þ¥Ã¥Á ¥ª¥ó + ONIG_OPTION_EXTEND ¥Ñ¥¿¡¼¥ó³ÈÄ¥·Á¼° + ONIG_OPTION_FIND_LONGEST ºÇĹ¥Þ¥Ã¥Á + ONIG_OPTION_FIND_NOT_EMPTY ¶õ¥Þ¥Ã¥Á¤ò̵»ë + ONIG_OPTION_NEGATE_SINGLELINE + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA¤Ç + ¥Ç¥Õ¥©¥ë¥È¤Ç͸ú¤ÊONIG_OPTION_SINGLELINE¤ò¥¯¥ê¥¢¤¹¤ë¡£ + + ONIG_OPTION_DONT_CAPTURE_GROUP ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤Î¤ßÊá³Í + ONIG_OPTION_CAPTURE_GROUP ̾Á°Ìµ¤·Êá³Í¼°½¸¹ç¤âÊá³Í + + 5 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8 KOI8 + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB 18030 + + ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹ + + 6 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_DEFAULT default (== Ruby) + onig_set_default_syntax() + + ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigSyntaxType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹ + + 7 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹ + onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£ + ¤³¤Î´Ø¿ô¤Ï¡¢onig_new()¤Î¥Ç¥é¥Ã¥¯¥¹ÈÇ¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó + 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length) + 4 ci: ¥³¥ó¥Ñ¥¤¥ë¾ðÊó + + ci->num_of_elements: ci¤ÎÍ×ÁÇ¿ô (¸½ºß¤ÎÈǤǤÏ: 5) + ci->pattern_enc: ¥Ñ¥¿¡¼¥óʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + ci->target_enc: ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + ci->syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ + ci->option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó + ci->ambig_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î + ʸ»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥° + + ONIGENC_AMBIGUOUS_MATCH_NONE: Û£ËæÌµ¤· + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCII¤ÎÂçʸ»ú¾®Ê¸»ú + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú + ONIGENC_AMBIGUOUS_MATCH_FULL: Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í¸ú + ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) + onig_set_default_ambig_flag() + + 5 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹ + onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë + + + °Û¤Ê¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ÎÁȤ߹ç¤ï¤»¤Ï¡¢°Ê²¼¤Î¾ì¹ç¤Ë¤Î¤ßµö¤µ¤ì¤ë¡£ + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + Àµµ¬É½¸½¤Çʸ»úÎó¤ò¸¡º÷¤·¡¢¸¡º÷·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á°ÌÃÖ (p - str >= 0) + ¸¡º÷¼ºÇÔ: ONIG_MISMATCH (< 0) + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 str: ¸¡º÷ÂоÝʸ»úÎó + 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹ + 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹ + Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ < range) + ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ <= start) + 6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë) + 7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) + + ʸ»úÎó¤Î»ØÄê°ÌÃ֤ǥޥåÁ¥ó¥°¤ò¹Ô¤¤¡¢·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á¤·¤¿¥Ð¥¤¥ÈĹ (>= 0) + not match: ONIG_MISMATCH ( < 0) + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 str: ¸¡º÷ÂоÝʸ»úÎó + 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 at: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷¥¢¥É¥ì¥¹ + 5 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë) + 6 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó + + ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤ + ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë + + +# OnigRegion* onig_region_new(void) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òºîÀ®¤¹¤ë¡£ + + +# void onig_region_free(OnigRegion* region, int free_self) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤Ç»ÈÍѤµ¤ì¤Æ¤¤¤ë¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊ󥪥֥¸¥§¥¯¥È + 2 free_self: [1: region¼«¿È¤ò´Þ¤á¤ÆÁ´¤Æ²òÊü, 0: region¼«¿È¤Ï²òÊü¤·¤Ê¤¤] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òÊ£À½¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂоÝÎΰè + 2 from: ¸µÎΰè + + +# void onig_region_clear(OnigRegion* region) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÃæÌ£¤ò¥¯¥ê¥¢¤¹¤ë¡£ + + °ú¿ô + 1 region: ÂоÝÎΰè + + +# int onig_region_resize(OnigRegion* region, int n) + + ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¿ô¤òÊѹ¹¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 region: ÂоÝÎΰè + 2 n: ¿·¤·¤¤¥µ¥¤¥º + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + »ØÄꤷ¤¿Ì¾Á°¤ËÂФ¹¤ë̾Á°ÉÕ¤Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤Î + ¥°¥ë¡¼¥×ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¡£ + ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤Ï¡¢(?<name>....)¤Ë¤è¤Ã¤ÆÄêµÁ¤Ç¤¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: »ØÄꤵ¤ì¤¿Ì¾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¿ô + (Îã /(?<x>..)(?<x>..)/ ==> 2) + ̾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¤¬Â¸ºß¤·¤Ê¤¤: -1 + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾ + 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 num_list: ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹ + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + »ØÄꤵ¤ì¤¿Ì¾Á°¤Î¸åÊý»²¾È(\k<name>)¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤ÎÈÖ¹æ¤òÊÖ¤¹¡£ + ̾Á°¤ËÂФ·¤Æ¡¢Ê£¿ô¤Î¥Þ¥Ã¥ÁÎΰ褬͸ú¤Ç¤¢¤ì¤Ð¡¢¤½¤ÎÃæ¤ÎºÇÂç¤ÎÈÖ¹æ¤òÊÖ¤¹¡£ + ̾Á°¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç¤¬°ì¸Ä¤·¤«¤Ê¤¤¤È¤¤Ë¤Ï¡¢Âбþ¤¹¤ë¥Þ¥Ã¥ÁÎΰ褬͸ú¤« + ¤É¤¦¤«¤Ë´Ø·¸¤Ê¤¯¡¢¤½¤ÎÈÖ¹æ¤òÊÖ¤¹¡£(½¾¤Ã¤Æ¡¢region¤Ë¤ÏNULL¤òÅϤ·¤Æ¤â¤è¤¤¡£) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ÈÖ¹æ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾ + 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹ + 4 region: search/match·ë²Ì¤Î¥Þ¥Ã¥ÁÎΰè + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + Á´¤Æ¤Î̾Á°¤ËÂФ·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¸Æ¤Ó½Ð¤·¤ò¼Â¹Ô¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0 + ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + 2 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô + func(name, name_end, <number of groups>, <group number's list>, + reg, arg); + + func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î¥³¡¼¥ë¥Ð¥Ã¥¯¤Ï¹Ô¤Ê¤ï¤º¤Ë + ½ªÎ»¤¹¤ë¡£ + + 3 arg: func¤ËÂФ¹¤ëÄɲðú¿ô + + +# int onig_number_of_names(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Ì¾Á°¤Î¿ô¤òÊÖ¤¹¡£ + °ì¸Ä¤Î̾Á°¤Î¿½ÅÄêµÁ¤Ï°ì¸Ä¤È´ÇÐö¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigAmbigType onig_get_ambig_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# int onig_number_of_captures(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³Í¥°¥ë¡¼¥×¤Î¿ô¤òÊÖ¤¹¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# int onig_number_of_capture_histories(regex_t* reg) + + ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³ÍÍúÎò(?@...)¤Î¿ô¤òÊÖ¤¹¡£ + + »ÈÍѤ¹¤ëʸˡ¤ÇÊá³ÍÍúÎòµ¡Ç½¤¬Í¸ú(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) + ¤Ç¤Ê¤±¤ì¤Ð¡¢Êá³ÍÍúÎòµ¡Ç½¤Ï»ÈÍѤǤ¤Ê¤¤¡£ + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + Êá³ÍÍúÎò¥Ç¡¼¥¿¤Î¥ë¡¼¥È¥Î¡¼¥É¤òÊÖ¤¹¡£ + + ¥Þ¥Ã¥Á¤¬¼ºÇÔ¤·¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢¤³¤ÎÃͤÏÉÔÄê¤Ç¤¢¤ë¡£ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + Êá³ÍÍúÎò¥Ç¡¼¥¿ÌÚ¤ò½ä²ó¤·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0 + ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ + + °ú¿ô + 1 region: ¥Þ¥Ã¥ÁÎΰè + 2 at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤ò¹Ô¤Ê¤¦¥¿¥¤¥ß¥ó¥° + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: + ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó + ONIG_TRAVERSE_CALLBACK_AT_LAST: + »Ò¥Î¡¼¥É¤ò½ä²ó¤·¤Æ¡¢¥³¡¼¥ë¥Ð¥Ã¥¯ + ONIG_TRAVERSE_CALLBACK_AT_BOTH: + ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó¡¢ºÇ¸å¤Ë¤â¤¦°ìÅÙ¥³¡¼¥ë¥Ð¥Ã¥¯ + + 3 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô + func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î½ä²ó¤Ï¹Ô¤Ê¤ï¤º¤Ë + ½ªÎ»¤¹¤ë¡£ + + int func(int group, int beg, int end, int level, int at, + void* arg) + group: ¥°¥ë¡¼¥×ÈÖ¹æ + beg: ¥Þ¥Ã¥Á³«»Ï°ÌÃÖ + end ¥Þ¥Ã¥Á½ªÎ»°ÌÃÖ + level: ¥Í¥¹¥È¥ì¥Ù¥ë (0¤«¤é) + at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤¬¸Æ¤Ó½Ð¤µ¤ì¤¿¥¿¥¤¥ß¥ó¥° + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: Äɲðú¿ô + + 4 arg; func¤ËÂФ¹¤ëÄɲðú¿ô + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + ̾Á°¤Ê¤·¼°½¸¹ç¤ÎÊá³Íµ¡Ç½¤¬Í¸ú¤«¤É¤¦¤«¤òÊÖ¤¹¡£ + + ͸ú: 1 + ̵¸ú: 0 + + °ú¿ô + 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + + + ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_DONT_CAPTURE_GROUP¤¬ON --> ̵¸ú + + ¥Ñ¥¿¡¼¥ó¤¬Ì¾Á°¤Ä¤¼°½¸¹ç¤ò»ÈÍѤ·¤Æ¤¤¤ë + AND »ÈÍÑʸˡ¤Ç¡¢ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP¤¬ON + AND ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_CAPTURE_GROUP¤¬OFF + --> ̵¸ú + + ¾åµ°Ê³°¤Î¾ì¹ç --> ͸ú + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + ʸ»ú°ì¸ÄʬÁ°¤Îʸ»úÎó°ÌÃÖ¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ëº¸Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ë±¦Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£ + + °ú¿ô + 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹ + 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + ʸ»úÎó¤Îʸ»ú¿ô¤òÊÖ¤¹¡£ + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + ʸ»úÎó¤Î¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£ + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + ¥Ç¥Õ¥©¥ë¥È¤ÎÀµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥»¥Ã¥È¤¹¤ë¡£ + + °ú¿ô + 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥³¥Ô¡¼¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂÐ¾Ý + 2 from: ¸µ + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÎÍ×ÁǤò»²¾È/¼èÆÀ¤¹¤ë¡£ + + °ú¿ô + 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ + 2 op, op2, behavior, options: Í×ÁǤÎÃÍ + + +# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from) + + ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò¥³¥Ô¡¼¤¹¤ë¡£ + + °ú¿ô + 1 to: ÂÐ¾Ý + 2 from: ¸µ + + +# int onig_set_meta_char(OnigEncoding enc, unsigned int what, + OnigCodePoint code) + + ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£ + ONIG_SYN_OP_VARIABLE_META_CHARACTERS¤¬Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤Ç͸ú¤Ë + ¤Ê¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢¥¨¥¹¥±¡¼¥×ʸ»ú¤ò½ü¤¤¤Æ¡¢¤³¤³¤Ç»ØÄꤷ¤¿¥á¥¿Ê¸»ú¤Ï + µ¡Ç½¤·¤Ê¤¤¡£(Áȹþ¤ß¤Îʸˡ¤Ç¤Ï͸ú¤Ë¤·¤Æ¤¤¤Ê¤¤¡£) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + °ú¿ô + 1 enc: ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR. + + +# OnigAmbigType onig_get_default_ambig_flag() + + ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£ + + +# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag) + + ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£ + + °ú¿ô + 1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥° + + +# unsigned int onig_get_match_stack_limit_size(void) + + ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤòÊÖ¤¹¡£ + (¥Ç¥Õ¥©¥ë¥È: 0 == ̵À©¸Â) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤò»ØÄꤹ¤ë¡£ + (size = 0: ̵À©¸Â) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + +# int onig_end(void) + + ¥é¥¤¥Ö¥é¥ê¤Î»ÈÍѤò½ªÎ»¤¹¤ë¡£ + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + onig_init()¤òºÆÅٸƤӽФ·¤Æ¤â¡¢°ÊÁ°¤ËºîÀ®¤·¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È + ¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¤Ç¤¤Ê¤¤¡£ + + +# const char* onig_version(void) + + ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8") + +// END diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ new file mode 100644 index 0000000..dccf242 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/FAQ @@ -0,0 +1,37 @@ +FAQ 2006/10/30 + +1. Lognest match + + You can execute longest match by using ONIG_OPTION_FIND_LONGEST option + in onig_new(). + + +2. Thread safe + + In order to make thread safe, which of (A) or (B) must be done. + + (A) Oniguruma Layer + + Define the macro below at NOT_RUBY case in oniguruma/regint.h. + + USE_MULTI_THREAD_SYSTEM + THREAD_ATOMIC_START + THREAD_ATOMIC_END + THREAD_PASS + + THREAD_SYSTEM_INIT + THREAD_SYSTEM_END + + + (B) Application Layer + + The plural threads should not do simultaneously that making + new regexp objects or re-compiling objects or freeing objects, + even if these objects are differ. + + +3. Mailing list + + There is no mailing list about Oniguruma. + +// END diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja new file mode 100644 index 0000000..5582765 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/FAQ.ja @@ -0,0 +1,122 @@ +FAQ 2006/10/30 + +1. ºÇĹ¥Þ¥Ã¥Á + + onig_new()¤ÎÃæ¤Ç¡¢ONIG_OPTION_FIND_LONGEST¥ª¥×¥·¥ç¥ó + ¤ò»ÈÍѤ¹¤ì¤ÐºÇĹ¥Þ¥Ã¥Á¤Ë¤Ê¤ë¡£ + + +2. ¥¹¥ì¥Ã¥É¥»¡¼¥Õ + + ¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢°Ê²¼¤Î(A)¤È(B)¤Î¤É¤Á¤é¤«¤ò¹Ô¤Ê¤¨¤Ð + ¤è¤¤¡£ + + (A) Oniguruma Layer + + oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£ + + USE_MULTI_THREAD_SYSTEM + THREAD_ATOMIC_START + THREAD_ATOMIC_END + THREAD_PASS + + ²¿¤é¤«¤Î½é´ü²½/½ªÎ»½èÍý¤¬É¬ÍפǤ¢¤ì¤Ð¡¢°Ê²¼¤Î¥Þ¥¯¥í¤ËÄêµÁ¤¹¤ë¡£ + THREAD_SYSTEM_INIT + THREAD_SYSTEM_END + + + (B) Application Layer + + Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢ + ¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£ + ¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£ + + ¤â¤¦¾¯¤·¾Ü¤·¤¤ÀâÌÀ¤Ï¡¢¤³¤Î¥É¥¥å¥á¥ó¥È¤ÎÃæ¤Î + "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊäÂ"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£ + + +3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È + + µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£ + +//END + + + +¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä + +¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢¸ÄÊ̤Υ¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢ +Oniguruma¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢¤É¤Á¤é¤«¤òÁª¤Ö¤³¤È¤¬¤Ç¤¤Þ¤¹¡£ +(Oniguruma¤ò»ÈÍѤ¹¤ë¦¤ÇÂн褹¤ë¤«¡¢Oniguruma¤ËÂн褵¤»¤ë¤« +¤É¤Á¤é¤«ÊÒÊý¤Ç¹Ô¤¦É¬Íפ¬¤¢¤ë¤È¤¤¤¦¤³¤È¤Ç¤¹¡£) + +¤³¤ì¤é¤ÎÊýË¡¤Ë¤Ä¤¤¤Æ¡¢°Ê²¼(A)¤È(B)¤ÇÀâÌÀ¤·¤Þ¤¹¡£ + +¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ï¡¢¤½¤ì¤¾¤ì¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¤è¤Ã¤Æ¤â +°Û¤Ê¤ê¤Þ¤¹¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤ÎÃæ¤Ç¶ñÂÎŪ¤Ë²¿¤ò¸Æ¤Ö¤Î¤«¤ò +½ñ¤¯¤³¤È¤Ï̵Íý¤Ç¤¹¡£¼ÂºÝ¤Ë»ÈÍѤµ¤ì¤ë¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ç¡¢ +Âбþ¤¹¤ëµ¡Ç½¤Î¤â¤Î¤ò»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£ + +(A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç + +oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç +°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + +USE_MULTI_THREAD_SYSTEM + + ñ¤Ë͸ú¤Ë¤¹¤ì¤Ð¤è¤¤¤Ç¤¹¡£ + +THREAD_ATOMIC_START +THREAD_ATOMIC_END + + THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END¤Ç°Ï¤Þ¤ì¤¿ + ¥×¥í¥°¥é¥à¤Î¥³¡¼¥ÉÉôʬ¤ò¤¢¤ë¥¹¥ì¥Ã¥É¤¬¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î + ¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Ê¤¤¤³¤È¤òÊݾ㤹¤ë¤â¤Î¤ËÄêµÁ + ¤·¤Æ¤¯¤À¤µ¤¤¡£ + (̾Á°¤ÎÄ̤ꡢ°Ï¤Þ¤ì¤¿¥³¡¼¥ÉÉôʬ¤ò¥¹¥ì¥Ã¥É¥¢¥È¥ß¥Ã¥¯¤Ë + ¤¹¤ë¤È¤¤¤¦°ÕÌ£) + +THREAD_PASS + + ¤³¤ì¤ò¼Â¹Ô¤·¤¿¥¹¥ì¥Ã¥É¤«¤é¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤ò°Ñ¾ù + ¤¹¤ë¤â¤Î¤ËÄêµÁ¤ò¤·¤Æ¤¯¤À¤µ¤¤¡£(ºÆ¥¹¥±¥¸¥å¡¼¥ë¤ò¸Æ¤Ó½Ð¤¹ + ¤È¤¤¤¦°ÕÌ£) + Âбþ¤¹¤ëµ¡Ç½¤¬Á´¤¯¤Ê¤±¤ì¤Ð¡¢¶õÄêµÁ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£ + +(»²¹ÍÎã) +Ruby¤Î¾ì¹ç¤òÎã¤Ë¤¹¤ë¤È¡¢ +Ruby¤Ï¼«Ê¬¼«¿È¤ÇÆÈ¼«¤Î¥¹¥ì¥Ã¥Éµ¡Ç½¤ò¼ÂÁõ¤·¤Æ¤¤¤Þ¤¹¡£ +¤½¤Îµ¡Ç½¤ò»ÈÍѤ¹¤ë¤È¡¢°Ê²¼¤Î¤è¤¦¤ËÄêµÁ¤¹¤ì¤Ð¤è¤¤¤³¤È¤Ë +¤Ê¤ê¤Þ¤¹¡£ + +#define USE_MULTI_THREAD_SYSTEM +#define THREAD_SYSTEM_INIT +#define THREAD_SYSTEM_END +#define THREAD_ATOMIC_START DEFER_INTS +#define THREAD_ATOMIC_END ENABLE_INTS +#define THREAD_PASS rb_thread_schedule() + +Ruby¤Î¾ì¹ç¡¢¥¿¥¤¥Þ³ä¤ê¹þ¤ß¤ò»ÈÍѤ·¤Æ¡¢¥¹¥ì¥Ã¥É¤ÎÀÚ¤êÂØ¤¨¤ò +¹Ô¤Ã¤Æ¤¤¤Þ¤¹¡£DEFER_INTS¤Ï³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é¤Î¼Â¹Ô¤ò°ì»þŪ¤Ë +»ß¤á¤ë¤¿¤á¤Î¥Þ¥¯¥í¤Ç¤¹¡£ENABLE_INTS¥Þ¥¯¥í¤Ç³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é +¤Î¼Â¹Ô¤òµö²Ä¤·¤Þ¤¹¡£ +¤³¤ì¤Ë¤è¤Ã¤Æ¡¢THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END +¤Ç°Ï¤Þ¤ì¤¿Éôʬ¤Î¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Þ¤»¤ó¡£ + + +(B) ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç + +°Ê²¼¤òÊݾ㤹¤ë¤è¤¦¤Ë¡¢¥¹¥ì¥Ã¥É¤Î¼Â¹Ô¤òÀ©¸æ¤·¤Æ¤¯¤À¤µ¤¤¡£ + +Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò +¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£ + +onig_new(), onig_new_deluxe(), onig_free()¤Î¤É¤ì¤«¤Î¸Æ¤Ó½Ð¤·¤ò¡¢ +Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤¬Æ±»þ¤Ë¼Â¹Ô¤¹¤ë¤³¤È¤òÈò¤±¤Æ¤¯¤À¤µ¤¤¡£Æ±»þ¤Ç¤Ê¤±¤ì¤ÐÊ̤ˤ«¤Þ¤¤¤Þ¤»¤ó¡£ + +¤³¤ì¤Ï²¿¸ÎɬÍפʤΤ«¤È¤¤¤¦¤È¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë +²áÄø¤Ç¡¢ÆâÉô¤Ç¶¦Ä̤˻²¾È¤¹¤ë¥Æ¡¼¥Ö¥ë¤¬¤¢¤ê¤Þ¤¹¡£ +¤³¤Î¥Æ¡¼¥Ö¥ë¤ËÂФ·¤Æ¤Î¥Ç¡¼¥¿ÅÐÏ¿½èÍý¤¬Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤Ç¾×ÆÍ¤·¤Æ +°Û¾ï¤Ê¾õÂ֤ˤʤé¤Ê¤¤¤¿¤á¤ËɬÍפǤ¹¡£ + +// END diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE new file mode 100644 index 0000000..5a2783d --- /dev/null +++ b/ext/mbstring/oniguruma/doc/RE @@ -0,0 +1,412 @@ +Oniguruma Regular Expressions Version 4.3.0 2006/08/17 + +syntax: ONIG_SYNTAX_RUBY (default) + + +1. Syntax elements + + \ escape (enable or disable meta character meaning) + | alternation + (...) group + [...] character class + + +2. Characters + + \t horizontal tab (0x09) + \v vertical tab (0x0B) + \n newline (0x0A) + \r return (0x0D) + \b back space (0x08) + \f form feed (0x0C) + \a bell (0x07) + \e escape (0x1B) + \nnn octal char (encoded byte value) + \xHH hexadecimal char (encoded byte value) + \x{7HHHHHHH} wide hexadecimal char (character code point value) + \cx control char (character code point value) + \C-x control char (character code point value) + \M-x meta (x|0x80) (character code point value) + \M-\C-x meta control char (character code point value) + + (* \b is effective in character class [...] only) + + +3. Character types + + . any character (except newline) + + \w word character + + Not Unicode: + alphanumeric, "_" and multibyte char. + + Unicode: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W non word char + + \s whitespace char + + Not Unicode: + \t, \n, \v, \f, \r, \x20 + + Unicode: + 0009, 000A, 000B, 000C, 000D, 0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S non whitespace char + + \d decimal digit char + + Unicode: General_Category -- Decimal_Number + + \D non decimal digit char + + \h hexadecimal digit char [0-9a-fA-F] + + \H non hexadecimal digit char + + +4. Quantifier + + greedy + + ? 1 or 0 times + * 0 or more times + + 1 or more times + {n,m} at least n but not more than m times + {n,} at least n times + {,n} at least 0 but not more than n times ({0,n}) + {n} n times + + reluctant + + ?? 1 or 0 times + *? 0 or more times + +? 1 or more times + {n,m}? at least n but not more than m times + {n,}? at least n times + {,n}? at least 0 but not more than n times (== {0,n}?) + + possessive (greedy and does not backtrack after repeated) + + ?+ 1 or 0 times + *+ 0 or more times + ++ 1 or more times + + ({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only) + + ex. /a*+/ === /(?>a*)/ + + +5. Anchors + + ^ beginning of the line + $ end of the line + \b word boundary + \B not word boundary + \A beginning of string + \Z end of string, or before newline at the end + \z end of string + \G matching start position (*) + + * Ruby Regexp: + previous end-of-match position + (This specification is not related to this library.) + + +6. Character class + + ^... negative class (lowest precedence operator) + x-y range from x to y + [...] set (character class in character class) + ..&&.. intersection (low precedence at the next of ^) + + ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w] + + * If you want to use '[', '-', ']' as a normal character + in a character class, you should escape these characters by '\'. + + + POSIX bracket ([:xxxxx:], negate [:^xxxxx:]) + + Not Unicode Case: + + alnum alphabet or digit char + alpha alphabet + ascii code value: [0 - 127] + blank \t, \x20 + cntrl + digit 0-9 + graph include all of multibyte encoded characters + lower + print include all of multibyte encoded characters + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + + + Unicode Case: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + 0009 | 000A | 000B | 000C | 000D | 0085 + upper Uppercase_Letter + xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + (0-9, a-f, A-F) + + +7. Extended groups + + (?#...) comment + + (?imx-imx) option on/off + i: ignore case + m: multi-line (dot(.) match newline) + x: extended form + (?imx-imx:subexp) option on/off for subexp + + (?:subexp) not captured group + (subexp) captured group + + (?=subexp) look-ahead + (?!subexp) negative look-ahead + (?<=subexp) look-behind + (?<!subexp) negative look-behind + + Subexp of look-behind must be fixed character length. + But different character length is allowed in top level + alternatives only. + ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed. + + In negative-look-behind, captured group isn't allowed, + but shy group(?:) is allowed. + + (?>subexp) atomic group + don't backtrack in subexp. + + (?<name>subexp) define named group + (All characters of the name must be a word character. + And first character must not be a digit or uppper case) + + Not only a name but a number is assigned like a captured + group. + + Assigning the same name as two or more subexps is allowed. + In this case, a subexp call can not be performed although + the back reference is possible. + + +8. Back reference + + \n back reference by group number (n >= 1) + \k<name> back reference by group name + + In the back reference by the multiplex definition name, + a subexp with a large number is referred to preferentially. + (When not matched, a group of the small number is referred to.) + + * Back reference by group number is forbidden if named group is defined + in the pattern and ONIG_OPTION_CAPTURE_GROUP is not setted. + + + back reference with nest level + + (This function is disabled in Ruby 1.9.) + + \k<name+n> n: 0, 1, 2, ... + \k<name-n> n: 0, 1, 2, ... + + Destinate relative nest level from back reference position. + + ex 1. + + /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer") + + ex 2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (?<element> \g<stag> \g<content>* \g<etag> ){0} + (?<stag> < \g<name> \s* > ){0} + (?<name> [a-zA-Z_:]+ ){0} + (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0} + (?<etag> </ \k<name+1> >){0} + \g<element> + __REGEXP__ + + p r.match('<foo>f<bar>bbb</bar>f</foo>').captures + + + +9. Subexp call ("Tanaka Akira special") + + \g<name> call by group name + \g<n> call by group number (n >= 1) + + * left-most recursive call is not allowed. + ex. (?<name>a|\g<name>b) => error + (?<name>a|b\g<name>c) => OK + + * Call by group number is forbidden if named group is defined in the pattern + and ONIG_OPTION_CAPTURE_GROUP is not setted. + + * If the option status of called group is different from calling position + then the group's option is effective. + + ex. (?-i:\g<name>)(?i:(?<name>a)){0} match to "A" + + +10. Captured group + + Behavior of the no-named group (...) changes with the following conditions. + (But named group is not changed.) + + case 1. /.../ (named group is not used, no option) + + (...) is treated as a captured group. + + case 2. /.../g (named group is not used, 'g' option) + + (...) is treated as a no-captured group (?:...). + + case 3. /..(?<name>..)../ (named group is used, no option) + + (...) is treated as a no-captured group (?:...). + numbered-backref/call is not allowed. + + case 4. /..(?<name>..)../G (named group is used, 'G' option) + + (...) is treated as a captured group. + numbered-backref/call is allowed. + + where + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + + ('g' and 'G' options are argued in ruby-dev ML) + + These options are not implemented in Ruby level. + + +----------------------------- +A-1. Syntax depend options + + + ONIG_SYNTAX_RUBY + (?m): dot(.) match newline + + + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA + (?s): dot(.) match newline + (?m): ^ match after newline, $ match before newline + + +A-2. Original extensions + + + hexadecimal digit char type \h, \H + + named group (?<name>...) + + named backref \k<name> + + subexp call \g<name>, \g<group-num> + + +A-3. Lacked features compare with perl 5.8.0 + + + [:word:] + + \N{name} + + \l,\u,\L,\U, \X, \C + + (?{code}) + + (??{code}) + + (?(condition)yes-pat|no-pat) + + * \Q...\E + This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. + + * \p{property}, \P{property} + This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, ASCII are supported. + + Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only. + ex. \p{IsXDigit}. + + Negation operator of property is supported in ONIG_SYNTAX_PERL only. + \p{^...}, \P{^...} + + +A-4. Differences with Japanized GNU regex(version 0.12) of Ruby + + + add hexadecimal digit char type (\h, \H) + + add look-behind + (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern) + + add possessive quantifier. ?+, *+, ++ + + add operations in character class. [], && + ('[' must be escaped as an usual char in character class.) + + add named group and subexp call. + + octal or hexadecimal number sequence can be treated as + a multibyte code char in character class if multibyte encoding + is specified. + (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + allow the range of single byte char and multibyte char in character + class. + ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding. + + effect range of isolated option is to next ')'. + ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b). + + isolated option is not transparent to previous pattern. + ex. a(?i)* is a syntax error pattern. + + allowed incompleted left brace as an usual string. + ex. /{/, /({)/, /a{2,3/ etc... + + negative POSIX bracket [:^xxxx:] is supported. + + POSIX bracket [:ascii:] is added. + + repeat of look-ahead is not allowed. + ex. /(?=a)*/, /(?!b){5}/ + + Ignore case option is effective to numbered character. + ex. /\x61/i =~ "A" + + In the range quantifier, the number of the minimum is omissible. + /a{,n}/ == /a{0,n}/ + The simultanious abbreviation of the number of times of the minimum + and the maximum is not allowed. (/a{,}/) + + /a{n}?/ is not a non-greedy operator. + /a{n}?/ == /(?:a{n})?/ + + invalid back reference is checked and cause error. + /\1/, /(a)\2/ + + Zero-length match in infinite repeat stops the repeat, + then changes of the capture group status are checked as stop condition. + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + + +A-5. Disabled functions by default syntax + + + capture history + + (?@...) and (?@<name>...) + + ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] + + see sample/listcap.c file. + + +A-6. Problems + + + Invalid encoding byte sequence is not checked in UTF-8. + + * Invalid first byte is treated as a character. + /./u =~ "\xa3" + + * Incomplete byte sequence is not checked. + /\w+/ =~ "a\xf3\x8ec" + +// END diff --git a/ext/mbstring/oniguruma/doc/RE.ja b/ext/mbstring/oniguruma/doc/RE.ja new file mode 100644 index 0000000..5168171 --- /dev/null +++ b/ext/mbstring/oniguruma/doc/RE.ja @@ -0,0 +1,424 @@ +µ´¼Ö Àµµ¬É½¸½ Version 4.3.0 2006/08/17 + +»ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ) + + +1. ´ðËÜÍ×ÁÇ + + \ ÂàÈò½¤¾þ (¥¨¥¹¥±¡¼¥×) Àµµ¬É½¸½µ¹æ¤Î͸ú/̵¸ú¤ÎÀ©¸æ + | ÁªÂò»Ò + (...) ¼°½¸¹ç (¥°¥ë¡¼¥×) + [...] ʸ»ú½¸¹ç (ʸ»ú¥¯¥é¥¹) + + +2. ʸ»ú + + \t ¿åÊ¿¥¿¥Ö (0x09) + \v ¿âľ¥¿¥Ö (0x0B) + \n ²þ¹Ô (0x0A) + \r Éüµ¢ (0x0D) + \b ¸åÂà¶õÇò (0x08) + \f ²þÊÇ (0x0C) + \a ¾â (0x07) + \e ÂàÈò½¤¾þ (0x1B) + \nnn Ȭ¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \xHH ½½Ï»¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \cx À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \C-x À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-\C-x Ķ + À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + + ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ß͸ú + + +3. ʸ»ú¼ï + + . Ǥ°Õʸ»ú (²þ¹Ô¤ò½ü¤¯) + + \w ñ¸ì¹½À®Ê¸»ú + + Unicode°Ê³°¤Î¾ì¹ç: + ±Ñ¿ô»ú, "_" ¤ª¤è¤Ó ¿¥Ð¥¤¥Èʸ»ú¡£ + + Unicode¤Î¾ì¹ç: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W Èóñ¸ì¹½À®Ê¸»ú + + \s ¶õÇòʸ»ú + + Unicode°Ê³°¤Î¾ì¹ç: + \t, \n, \v, \f, \r, \x20 + + Unicode¤Î¾ì¹ç: + 0009, 000A, 000B, 000C, 000D, 0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S Èó¶õÇòʸ»ú + + \d 10¿Ê¿ô»ú + + Unicode¤Î¾ì¹ç: General_Category -- Decimal_Number + + \D Èó10¿Ê¿ô»ú + + \h 16¿Ê¿ô»ú [0-9a-fA-F] + + \H Èó16¿Ê¿ô»ú + + + +4. ÎÌ»ØÄê»Ò + + ÍßÄ¥¤ê + + ? °ì²ó¤Þ¤¿¤ÏÎí²ó + * Îí²ó°Ê¾å + + °ì²ó°Ê¾å + {n,m} n²ó°Ê¾åm²ó°Ê²¼ + {n,} n²ó°Ê¾å + {,n} Îí²ó°Ê¾ån²ó°Ê²¼ ({0,n}) + {n} n²ó + + ̵Íß + + ?? °ì²ó¤Þ¤¿¤ÏÎí²ó + *? Îí²ó°Ê¾å + +? °ì²ó°Ê¾å + {n,m}? n²ó°Ê¾åm²ó°Ê²¼ + {n,}? n²ó°Ê¾å + {,n}? Îí²ó°Ê¾ån²ó°Ê²¼ (== {0,n}?) + + ¶¯Íß (ÍßÄ¥¤ê¤Ç¡¢·«¤êÊÖ¤·¤ËÀ®¸ù¤·¤¿¸å¤Ï²ó¿ô¤ò¸º¤é¤¹¤è¤¦¤Ê¸åÂàºÆ»î¹Ô¤ò¤·¤Ê¤¤) + + ?+ °ì²ó¤Þ¤¿¤ÏÎí²ó + *+ Îí²ó°Ê¾å + ++ °ì²ó°Ê¾å + + ({n,m}+, {n,}+, {n}+ ¤Ï¡¢ONIG_SYNTAX_JAVA¤Ç¤Î¤ß¶¯ÍߤʻØÄê»Ò) + + Îã. /a*+/ === /(?>a*)/ + + +5. ÉÅ + + ^ ¹ÔƬ + $ ¹ÔËö + \b ñ¸ì¶³¦ + \B Èóñ¸ì¶³¦ + \A ʸ»úÎóÀèÆ¬ + \Z ʸ»úÎóËöÈø¡¢¤Þ¤¿¤Ïʸ»úÎóËöÈø¤Î²þ¹Ô¤ÎľÁ° + \z ʸ»úÎóËöÈø + \G ¾È¹ç³«»Ï°ÌÃÖ(*) + + * Ruby Regexp: + Á°²ó¾È¹çÀ®¸ùËöÈø°ÌÃÖ + (¤³¤Î»ÅÍͤÏRuby¤Î¼ÂÁõ¤Ë´Ø¤¹¤ë¤â¤Î¤Ç¤¢¤ê¡¢ + Àµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤È¤Ï̵´Ø·¸) + + +6. ʸ»ú½¸¹ç + + ^... ÈÝÄê (ºÇÄãÍ¥ÀèÅٱ黻»Ò) + x-y ÈÏ°Ï (x¤«¤éy¤Þ¤Ç) + [...] ½¸¹ç (ʸ»ú½¸¹çÆâʸ»ú½¸¹ç) + ..&&.. Àѱ黻 (^¤Î¼¡¤ËÍ¥ÀèÅÙ¤¬Ä㤤±é»»»Ò) + + Îã. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] + + ¢¨ '[', '-', ']'¤ò¡¢Ê¸»ú½¸¹çÆâ¤ÇÄ̾ïʸ»ú¤Î°ÕÌ£¤Ç»ÈÍѤ·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¤³¤ì¤é¤Îʸ»ú¤ò'\'¤ÇÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ + + + POSIX¥Ö¥é¥±¥Ã¥È ([:xxxxx:], ÈÝÄê [:^xxxxx:]) + + Unicode°Ê³°¤Î¾ì¹ç: + + alnum ±Ñ¿ô»ú + alpha ±Ñ»ú + ascii 0 - 127 + blank \t, \x20 + cntrl + digit 0-9 + graph ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à + lower + print ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + + Unicode¤Î¾ì¹ç: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + 0009 | 000A | 000B | 000C | 000D | 0085 + upper Uppercase_Letter + xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + (0-9, a-f, A-F) + + +7. ³ÈÄ¥¼°½¸¹ç + + (?#...) Ãí¼á + (?imx-imx) ¸ÉΩ¥ª¥×¥·¥ç¥ó + i: Âçʸ»ú¾®Ê¸»ú¾È¹ç + m: Ê£¿ô¹Ô + x: ³ÈÄ¥·Á¼° + (?imx-imx:¼°) ¼°¥ª¥×¥·¥ç¥ó + + (¼°) Êá³Í¼°½¸¹ç + (?:¼°) ÈóÊá³Í¼°½¸¹ç + + (?=¼°) ÀèÆÉ¤ß + (?!¼°) ÈÝÄêÀèÆÉ¤ß + (?<=¼°) Ìá¤êÆÉ¤ß + (?<!¼°) ÈÝÄêÌá¤êÆÉ¤ß + + Ìá¤êÆÉ¤ß¤Î¼°¤Ï¸ÇÄêʸ»úĹ¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ + ¤·¤«¤·¡¢ºÇ¾å°Ì¤ÎÁªÂò»Ò¤À¤±¤Ï°Û¤Ê¤Ã¤¿Ê¸»úŤ¬µö¤µ¤ì¤ë¡£ + Îã. (?<=a|bc) ¤Ïµö²Ä. (?<=aaa(?:b|cd)) ¤ÏÉÔµö²Ä + + ÈÝÄêÌá¤êÆÉ¤ß¤Ç¤Ï¡¢Êá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤Ê¤¤¤¬¡¢ + ÈóÊá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤ë¡£ + + (?>¼°) ¸¶»ÒŪ¼°½¸¹ç + ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤ + + (?<name>¼°) ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç + ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£ + (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï + ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£) + + ̾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£ + ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + ¤Î¤È¤¤Ï¡¢Ì¾Á°¤ò»È¤ï¤Ê¤¤¤ÇÈÖ¹æ¤Ç¤â»²¾È¤Ç¤¤ë¡£ + + Ê£¿ô¤Î¼°½¸¹ç¤ËƱ¤¸Ì¾Á°¤òÍ¿¤¨¤ë¤³¤È¤Ïµö¤µ¤ì¤Æ¤¤¤ë¡£ + ¤³¤Î¾ì¹ç¤Ë¤Ï¡¢¤³¤Î̾Á°¤ò»ÈÍѤ·¤¿¸åÊý»²¾È¤Ï²Äǽ¤Ç¤¢¤ë¤¬¡¢ + Éôʬ¼°¸Æ½Ð¤·¤Ï¤Ç¤¤Ê¤¤¡£ + + +8. ¸åÊý»²¾È + + \n ÈÖ¹æ»ØÄ껲¾È (n >= 1) + \k<name> ̾Á°»ØÄ껲¾È + + ̾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢ + ÈÖ¹æ¤ÎÂ礤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£ + (¥Þ¥Ã¥Á¤·¤Ê¤¤¤È¤¤Ë¤ÏÈÖ¹æ¤Î¾®¤µ¤¤¼°½¸¹ç¤¬»²¾È¤µ¤ì¤ë) + + ¢¨ ÈÖ¹æ»ØÄ껲¾È¤Ï¡¢Ì¾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢ + ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¶Ø»ß¤µ¤ì¤ë¡£(10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + + + ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤¸åÊý»²¾È + + ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£ + + \k<name+n> n: 0, 1, 2, ... + \k<name-n> n: 0, 1, 2, ... + + ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î + Êá³ÍÃͤò»²¾È¤¹¤ë¡£ + + Îã-1. + + /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer") + + Îã-2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (?<element> \g<stag> \g<content>* \g<etag> ){0} + (?<stag> < \g<name> \s* > ){0} + (?<name> [a-zA-Z_:]+ ){0} + (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0} + (?<etag> </ \k<name+1> >){0} + \g<element> + __REGEXP__ + + p r.match('<foo>f<bar>bbb</bar>f</foo>').captures + + + +9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæÅ¯¥¹¥Ú¥·¥ã¥ë") + + \g<name> ̾Á°»ØÄê¸Æ½Ð¤· + \g<n> ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1) + + ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£ + Îã. (?<name>a|\g<name>b) => error + (?<name>a|b\g<name>c) => OK + + ¢¨ ÈÖ¹æ»ØÄê¸Æ½Ð¤·¤Ï¡¢Ì¾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢ + ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢ + ¶Ø»ß¤µ¤ì¤ë¡£ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È) + + ¢¨ ¸Æ¤Ó½Ð¤µ¤ì¤¿¼°½¸¹ç¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬¸Æ½Ð¤·Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂ֤ȰۤʤäƤ¤¤ë + ¤È¤¡¢¸Æ¤Ó½Ð¤µ¤ì¤¿Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬Í¸ú¤Ç¤¢¤ë¡£ + + Îã. (?-i:\g<name>)(?i:(?<name>a)){0} ¤Ï "A" ¤Ë¾È¹çÀ®¸ù¤¹¤ë¡£ + + +10. Êá³Í¼°½¸¹ç + + Êá³Í¼°½¸¹ç(...)¤Ï¡¢°Ê²¼¤Î¾ò·ï¤Ë±þ¤¸¤Æ¿¶Éñ¤¬ÊѲ½¤¹¤ë¡£ + (̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤ÏÊѲ½¤·¤Ê¤¤) + + case 1. /.../ (̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·) + + (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + + case 2. /.../g (̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'g'¤ò»ØÄê) + + (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + + case 3. /..(?<name>..)../ (̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·) + + (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤ÏÉÔµö²Ä¡£ + + case 4. /..(?<name>..)../G (̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'G'¤ò»ØÄê) + + (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£ + ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤Ïµö²Ä¡£ + + ⤷ + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + ('g'¤È'G'¥ª¥×¥·¥ç¥ó¤Ï¡¢ruby-dev ML¤ÇµÄÏÀ¤µ¤ì¤¿¡£) + + ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢ + ̾Á°ÉÕ¤Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦ + ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£ + ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£ + + +----------------------------- +Êäµ 1. ʸˡ°Í¸¥ª¥×¥·¥ç¥ó + + + ONIG_SYNTAX_RUBY + (?m): ½ª»ßÉäµ¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù + + + ONIG_SYNTAX_PERL ¤È ONIG_SYNTAX_JAVA + (?s): ½ª»ßÉäµ¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù + (?m): ^ ¤Ï²þ¹Ô¤Îľ¸å¤Ë¾È¹ç¤¹¤ë¡¢$ ¤Ï²þ¹Ô¤ÎľÁ°¤Ë¾È¹ç¤¹¤ë + + +Êäµ 2. ÆÈ¼«³ÈÄ¥µ¡Ç½ + + + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú \h, \H + + ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç (?<name>...) + + ̾Á°»ØÄê¸åÊý»²¾È \k<name> + + Éôʬ¼°¸Æ½Ð¤· \g<name>, \g<group-num> + + +Êäµ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤ÆÂ¸ºß¤·¤Ê¤¤µ¡Ç½ + + + [:word:] + + \N{name} + + \l,\u,\L,\U, \X, \C + + (?{code}) + + (??{code}) + + (?(condition)yes-pat|no-pat) + + * \Q...\E + ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤Ï͸ú + + * \p{property}, \P{property} + ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤Ï͸ú + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤¤ë¡£ + + ÆÃÀ̾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß + µö¤µ¤ì¤Æ¤¤¤ë¡£ + ex. \p{IsXDigit}. + + ÆÃÀ¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£ + \p{^...}, \P{^...} + + +Êäµ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤ + + + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà (\h, \H) + + Ìá¤êÆÉ¤ßµ¡Ç½¤òÄɲà + + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà (?+, *+, ++) + + ʸ»ú½¸¹ç¤ÎÃæ¤Î±é»»»Ò¤òÄɲà ([...], &&) + ('[' ¤Ï¡¢Ê¸»ú½¸¹ç¤ÎÃæ¤ÇÄ̾ï¤Îʸ»ú¤È¤·¤Æ»ÈÍѤ¹¤ë¤È¤¤Ë¤Ï + ÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤) + + ̾Á°ÉÕ¤Êá³Í¼°½¸¹ç¤È¡¢Éôʬ¼°¸Æ½Ð¤·µ¡Ç½Äɲà + + ¿¥Ð¥¤¥Èʸ»ú¥³¡¼¥É¤¬»ØÄꤵ¤ì¤Æ¤¤¤ë¤È¤¡¢ + ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹ç¤Çɽ¸½¤µ¤ì¤¿ + °ì¸Ä¤Îʸ»ú¤È²ò¼á¤µ¤ì¤ë + (Îã. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + ʸ»ú½¸¹ç¤ÎÃæ¤Ç¡¢°ì¥Ð¥¤¥Èʸ»ú¤È¿¥Ð¥¤¥Èʸ»ú¤ÎÈϰϻØÄê¤Ïµö¤µ¤ì¤ë¡£ + ex. /[a-¤¢]/ + + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤Î͸úÈϰϤϡ¢¤½¤Î¸ÉΩ¥ª¥×¥·¥ç¥ó¤ò´Þ¤ó¤Ç¤¤¤ë¼°½¸¹ç¤Î + ½ª¤ï¤ê¤Þ¤Ç¤Ç¤¢¤ë + Îã. (?:(?i)a|b) ¤Ï (?:(?i:a|b)) ¤È²ò¼á¤µ¤ì¤ë¡¢(?:(?i:a)|b)¤Ç¤Ï¤Ê¤¤ + + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤Ï¤½¤ÎÁ°¤Î¼°¤ËÂФ·¤ÆÆ©²áŪ¤Ç¤Ï¤Ê¤¤ + Îã. /a(?i)*/ ¤Ïʸˡ¥¨¥é¡¼¤È¤Ê¤ë + + ÉÔ´°Á´¤Ê·«¤êÊÖ¤·ÈϰϻØÄê»Ò¤ÏÄ̾ï¤Îʸ»úÎó¤È¤·¤Æµö²Ä¤µ¤ì¤ë + Îã. /{/, /({)/, /a{2,3/ + + ÈÝÄêŪPOSIX¥Ö¥é¥±¥Ã¥È [:^xxxx:] ¤òÄɲà + + POSIX¥Ö¥é¥±¥Ã¥È [:ascii:] ¤òÄɲà + + ÀèÆÉ¤ß¤Î·«¤êÊÖ¤·¤ÏÉÔµö²Ä + Îã. /(?=a)*/, /(?!b){5}/ + + ¿ôÃͤǻØÄꤵ¤ì¤¿Ê¸»ú¤ËÂФ·¤Æ¤â¡¢Âçʸ»ú¾®Ê¸»ú¾È¹ç¥ª¥×¥·¥ç¥ó¤Ï͸ú + Îã. /\x61/i =~ "A" + + ·«¤êÊÖ¤·²ó¿ô»ØÄê¤Ç¡¢ºÇÄã²ó¿ô¤Î¾Êά(0²ó)¤¬¤Ç¤¤ë + /a{,n}/ == /a{0,n}/ + ºÇÄã²ó¿ô¤ÈºÇÂç²ó¿ô¤ÎƱ»þ¾Êά¤Ïµö¤µ¤ì¤Ê¤¤¡£(/a{,}/) + + /a{n}?/¤Ï̵Íߤʱ黻»Ò¤Ç¤Ï¤Ê¤¤¡£ + /a{n}?/ == /(?:a{n})?/ + + ̵¸ú¤Ê¸åÊý»²¾È¤ò¥Á¥§¥Ã¥¯¤·¤Æ¥¨¥é¡¼¤Ë¤¹¤ë¡£ + /\1/, /(a)\2/ + + ̵¸Â·«¤êÊÖ¤·¤ÎÃæ¤Ç¡¢Ä¹¤µÎí¤Ç¤Î¾È¹çÀ®¸ù¤Ï·«¤êÊÖ¤·¤òÃæÃǤµ¤»¤ë¤¬¡¢ + ¤³¤Î¤È¤¡¢ÃæÃǤ¹¤Ù¤¤«¤É¤¦¤«¤ÎȽÄê¤È¤·¤Æ¡¢Êá³Í¼°½¸¹ç¤ÎÊá³Í¾õÂ֤Π+ ÊѲ½¤Þ¤Ç¹Íθ¤·¤Æ¤¤¤ë + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + + + +Êäµ 5. ¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë¤¬¡¢´ûÄêÃͤǤÏ͸ú¤Ë¤·¤Æ¤¤¤Ê¤¤µ¡Ç½ + + + Êá³ÍÍúÎò»²¾È + + (?@...) ¤È (?@<name>...) + + Îã. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] + + »ÈÍÑÊýË¡¤Ï¡¢sample/listcap.c¤ò»²¾È + + ͸ú¤Ë¤·¤Æ¤¤¤Ê¤¤Íýͳ¤Ï¡¢¤É¤ÎÄøÅÙÌò¤ËΩ¤Ä¤«¤Ï¤Ã¤¤ê¤·¤Ê¤¤¤¿¤á¡£ + + +Êäµ 6. ÌäÂêÅÀ + + + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£ + + * ÀèÆ¬¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹ + /./u =~ "\xa3" + + * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤ + /\w+/ =~ "a\xf3\x8ec" + + ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£ + +½ª¤ê diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c new file mode 100644 index 0000000..64be21d --- /dev/null +++ b/ext/mbstring/oniguruma/enc/ascii.c @@ -0,0 +1,67 @@ +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingASCII = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max byte length */ + 1, /* min byte length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_to_normalize, + onigenc_ascii_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c new file mode 100644 index 0000000..8679266 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/big5.c @@ -0,0 +1,168 @@ +/********************************************************************** + big5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_BIG5[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +big5_mbc_enc_len(const UChar* p) +{ + return EncLen_BIG5[*p]; +} + +static OnigCodePoint +big5_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); +} + +static int +big5_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); +} + +static int +big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag, + pp, end, lower); +} + +static int +big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); +} + +static int +big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); +} + +static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) +#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] + +static UChar* +big5_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (BIG5_ISMB_TRAIL(*p)) { + while (p > start) { + if (! BIG5_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enc_len(ONIG_ENCODING_BIG5, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +big5_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + + return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingType OnigEncodingBIG5 = { + big5_mbc_enc_len, + "Big5", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + big5_mbc_to_code, + onigenc_mb2_code_to_mbclen, + big5_code_to_mbc, + big5_mbc_to_normalize, + big5_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + big5_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + big5_left_adjust_char_head, + big5_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c new file mode 100644 index 0000000..71c81ee --- /dev/null +++ b/ext/mbstring/oniguruma/enc/euc_jp.c @@ -0,0 +1,228 @@ +/********************************************************************** + euc_jp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static const int EncLen_EUCJP[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +eucjp_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCJP[*p]; +} + +static OnigCodePoint +eucjp_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_EUC_JP, p); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +eucjp_code_to_mbclen(OnigCodePoint code) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 0; +} + +#if 0 +static int +eucjp_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} +#endif + +static int +eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +static int +eucjp_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp)++; + return 1; + } + else { + len = enc_len(ONIG_ENCODING_EUC_JP, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end); +} + +static int +eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +eucjp_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* In this encoding + mb-trail bytes doesn't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!eucjp_islead(*p) && p > start) p--; + len = enc_len(ONIG_ENCODING_EUC_JP, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + if (c <= 0x7e || c == 0x8e || c == 0x8f) + return TRUE; + else + return FALSE; +} + +OnigEncodingType OnigEncodingEUC_JP = { + eucjp_mbc_enc_len, + "EUC-JP", /* name */ + 3, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + eucjp_mbc_to_code, + eucjp_code_to_mbclen, + eucjp_code_to_mbc, + eucjp_mbc_to_normalize, + eucjp_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + eucjp_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + eucjp_left_adjust_char_head, + eucjp_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c new file mode 100644 index 0000000..57bf801 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/euc_kr.c @@ -0,0 +1,173 @@ +/********************************************************************** + euc_kr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCKR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euckr_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCKR[*p]; +} + +static OnigCodePoint +euckr_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); +} + +static int +euckr_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); +} + +static int +euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag, + pp, end, lower); +} + +static int +euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); +} + +static int +euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); +} + +#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) + +static UChar* +euckr_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euckr_islead(*p) && p > start) p--; + len = enc_len(ONIG_ENCODING_EUC_KR, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euckr_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_KR = { + euckr_mbc_enc_len, + "EUC-KR", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_to_normalize, + euckr_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; + +/* Same with OnigEncodingEUC_KR except the name */ +OnigEncodingType OnigEncodingEUC_CN = { + euckr_mbc_enc_len, + "EUC-CN", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_to_normalize, + euckr_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c new file mode 100644 index 0000000..6f396e7 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/euc_tw.c @@ -0,0 +1,144 @@ +/********************************************************************** + euc_tw.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCTW[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euctw_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCTW[*p]; +} + +static OnigCodePoint +euctw_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); +} + +static int +euctw_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); +} + +static int +euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag, + pp, end, lower); +} + +static int +euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end); +} + +static int +euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); +} + +#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff) + +static UChar* +euctw_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euctw_islead(*p) && p > start) p--; + len = enc_len(ONIG_ENCODING_EUC_TW, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euctw_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_TW = { + euctw_mbc_enc_len, + "EUC-TW", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + euctw_mbc_to_code, + onigenc_mb4_code_to_mbclen, + euctw_code_to_mbc, + euctw_mbc_to_normalize, + euctw_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + euctw_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euctw_left_adjust_char_head, + euctw_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c new file mode 100644 index 0000000..01995ea --- /dev/null +++ b/ext/mbstring/oniguruma/enc/gb18030.c @@ -0,0 +1,501 @@ +/********************************************************************** + gb18030.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2005 KUBO Takehiro <kubo AT jiubao DOT org> + * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#if 1 +#define DEBUG_GB18030(arg) +#else +#define DEBUG_GB18030(arg) printf arg +#endif + +enum { + C1, /* one-byte char */ + C2, /* one-byte or second of two-byte char */ + C4, /* one-byte or second or fourth of four-byte char */ + CM /* first of two- or four-byte char or second of two-byte char */ +}; + +static const char GB18030_MAP[] = { + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, + C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 +}; + +static int +gb18030_mbc_enc_len(const UChar* p) +{ + if (GB18030_MAP[*p] != CM) + return 1; + p++; + if (GB18030_MAP[*p] == C4) + return 4; + if (GB18030_MAP[*p] == C1) + return 1; /* illegal sequence */ + return 2; +} + +static OnigCodePoint +gb18030_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end); +} + +static int +gb18030_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf); +} + +static int +gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag, + pp, end, lower); +} + +static int +gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); +} + +static int +gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype); +} + +enum state { + S_START, + S_one_C2, + S_one_C4, + S_one_CM, + + S_odd_CM_one_CX, + S_even_CM_one_CX, + + /* CMC4 : pair of "CM C4" */ + S_one_CMC4, + S_odd_CMC4, + S_one_C4_odd_CMC4, + S_even_CMC4, + S_one_C4_even_CMC4, + + S_odd_CM_odd_CMC4, + S_even_CM_odd_CMC4, + + S_odd_CM_even_CMC4, + S_even_CM_even_CMC4, + + /* C4CM : pair of "C4 CM" */ + S_odd_C4CM, + S_one_CM_odd_C4CM, + S_even_C4CM, + S_one_CM_even_C4CM, + + S_even_CM_odd_C4CM, + S_odd_CM_odd_C4CM, + S_even_CM_even_C4CM, + S_odd_CM_even_C4CM, +}; + +static UChar* +gb18030_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + enum state state = S_START; + + DEBUG_GB18030(("----------------\n")); + for (p = s; p >= start; p--) { + DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); + switch (state) { + case S_START: + switch (GB18030_MAP[*p]) { + case C1: + return (UChar *)s; + case C2: + state = S_one_C2; /* C2 */ + break; + case C4: + state = S_one_C4; /* C4 */ + break; + case CM: + state = S_one_CM; /* CM */ + break; + } + break; + case S_one_C2: /* C2 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; /* CM C2 */ + break; + } + break; + case S_one_C4: /* C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CMC4; + break; + } + break; + case S_one_CM: /* CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)s; + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_odd_CM_one_CX; /* CM CM */ + break; + } + break; + + case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_one_CX; + break; + } + break; + case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; + break; + } + break; + + case S_one_CMC4: /* CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; /* C4 CM C4 */ + break; + case CM: + state = S_even_CM_one_CX; /* CM CM C4 */ + break; + } + break; + case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; + break; + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + case S_one_C4_odd_CMC4: /* C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CMC4; /* CM C4 CM C4 */ + break; + } + break; + case S_even_CMC4: /* CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 3); + case C4: + state = S_one_C4_even_CMC4; + break; + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CMC4; + break; + } + break; + + case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_even_CM_odd_CMC4; + break; + } + break; + case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + + case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_even_CMC4; + break; + } + break; + case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + + case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CM_odd_C4CM; /* CM C4 CM */ + break; + } + break; + case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 2); /* |CM C4 CM */ + case C4: + state = S_even_C4CM; + break; + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + case S_even_C4CM: /* C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* C4|CM C4 CM */ + case CM: + state = S_one_CM_even_C4CM; + break; + } + break; + case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + + case S_even_CM_odd_C4CM: /* CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|C4|CM */ + case CM: + state = S_odd_CM_odd_C4CM; + break; + } + break; + case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + + case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ + case CM: + state = S_odd_CM_even_C4CM; + break; + } + break; + case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + } + } + + DEBUG_GB18030(("state %d\n", state)); + switch (state) { + case S_START: return (UChar *)(s - 0); + case S_one_C2: return (UChar *)(s - 0); + case S_one_C4: return (UChar *)(s - 0); + case S_one_CM: return (UChar *)(s - 0); + + case S_odd_CM_one_CX: return (UChar *)(s - 1); + case S_even_CM_one_CX: return (UChar *)(s - 0); + + case S_one_CMC4: return (UChar *)(s - 1); + case S_odd_CMC4: return (UChar *)(s - 1); + case S_one_C4_odd_CMC4: return (UChar *)(s - 1); + case S_even_CMC4: return (UChar *)(s - 3); + case S_one_C4_even_CMC4: return (UChar *)(s - 3); + + case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); + case S_even_CM_odd_CMC4: return (UChar *)(s - 1); + + case S_odd_CM_even_CMC4: return (UChar *)(s - 1); + case S_even_CM_even_CMC4: return (UChar *)(s - 3); + + case S_odd_C4CM: return (UChar *)(s - 0); + case S_one_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_C4CM: return (UChar *)(s - 2); + case S_one_CM_even_C4CM: return (UChar *)(s - 0); + + case S_even_CM_odd_C4CM: return (UChar *)(s - 0); + case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_CM_even_C4CM: return (UChar *)(s - 2); + case S_odd_CM_even_C4CM: return (UChar *)(s - 0); + } + + return (UChar* )s; /* never come here. (escape warning) */ +} + +static int +gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + return GB18030_MAP[*s] == C1 ? TRUE : FALSE; +} + +OnigEncodingType OnigEncodingGB18030 = { + gb18030_mbc_enc_len, + "GB18030", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + gb18030_mbc_to_code, + onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbc, + gb18030_mbc_to_normalize, + gb18030_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + gb18030_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + gb18030_left_adjust_char_head, + gb18030_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c new file mode 100644 index 0000000..5646f26 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_1.c @@ -0,0 +1,151 @@ +/********************************************************************** + iso8859_1.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ + ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) + +static const unsigned short EncISO_8859_1_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_1_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_1_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_1 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-1", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_1_mbc_to_normalize, + iso_8859_1_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_1_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c new file mode 100644 index 0000000..8081ef8 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_10.c @@ -0,0 +1,300 @@ +/********************************************************************** + iso8859_10.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ + ((EncISO_8859_10_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247', + '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_10_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, + 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +iso_8859_10_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_10_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf is lower case letter, but can't convert. */ + if (*p == 0xdf) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_10_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xb1, 0xa1 }, + { 0xb2, 0xa2 }, + { 0xb3, 0xa3 }, + { 0xb4, 0xa4 }, + { 0xb5, 0xa5 }, + { 0xb6, 0xa6 }, + { 0xb8, 0xa8 }, + { 0xb9, 0xa9 }, + { 0xba, 0xaa }, + { 0xbb, 0xab }, + { 0xbc, 0xac }, + { 0xbe, 0xae }, + { 0xbf, 0xaf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_10 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-10", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_10_mbc_to_normalize, + iso_8859_10_is_mbc_ambiguous, + iso_8859_10_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_10_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c new file mode 100644 index 0000000..de9bb3b --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_11.c @@ -0,0 +1,105 @@ +/********************************************************************** + iso8859_11.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ + ((EncISO_8859_11_CtypeTable[code] & ctype) != 0) + +static const unsigned short EncISO_8859_11_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_11_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_11 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-11", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_to_normalize, + onigenc_ascii_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + iso_8859_11_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c new file mode 100644 index 0000000..69316ed --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_13.c @@ -0,0 +1,268 @@ +/********************************************************************** + iso8859_13.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ + ((EncISO_8859_13_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_13_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0, + 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0 +}; + +static int +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_13_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf, 0xb5 are lower case letter, but can't convert. */ + if (*p == 0xdf || *p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_13_CTYPE(code, ctype); + else + return FALSE; +} + +static int +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_13 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-13", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c new file mode 100644 index 0000000..44638cf --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_14.c @@ -0,0 +1,298 @@ +/********************************************************************** + iso8859_14.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ + ((EncISO_8859_14_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247', + '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377', + '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271', + '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_14_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0, + 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2, + 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2, + 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_14_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf is lower case letter, but can't convert. */ + if (*p == 0xdf) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_14_CTYPE(code, ctype); + else + return FALSE; +} + +static int +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa4, 0xa5 }, + { 0xa5, 0xa4 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xab, 0xa6 }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb1, 0xb0 }, + { 0xb2, 0xb3 }, + { 0xb3, 0xb2 }, + { 0xb4, 0xb5 }, + { 0xb5, 0xb4 }, + { 0xb7, 0xb9 }, + { 0xb8, 0xa8 }, + { 0xb9, 0xb7 }, + { 0xba, 0xaa }, + { 0xbb, 0xbf }, + { 0xbc, 0xac }, + { 0xbd, 0xbe }, + { 0xbe, 0xbd }, + { 0xbf, 0xbb }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xaf } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_14 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-14", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c new file mode 100644 index 0000000..f643b89 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_15.c @@ -0,0 +1,279 @@ +/********************************************************************** + iso8859_15.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ + ((EncISO_8859_15_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_15_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, + 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0, + 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_15_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_15_CTYPE(code, ctype); + else + return FALSE; +} + +static int +get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa6, 0xa8 }, + { 0xa8, 0xa6 }, + + { 0xb4, 0xb8 }, + { 0xb8, 0xb4 }, + { 0xbc, 0xbd }, + { 0xbd, 0xbc }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xbe } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_15 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-15", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c new file mode 100644 index 0000000..921ae36 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_16.c @@ -0,0 +1,292 @@ +/********************************************************************** + iso8859_16.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ + ((EncISO_8859_16_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', + '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', + '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_16_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0, + 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2, + 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0, + 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_16_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf is lower case letter, but can't convert. */ + if (*p == 0xdf) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_16_CTYPE(code, ctype); + else + return FALSE; +} + +static int +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xa8, 0xa6 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xae, 0xac }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb3, 0xa3 }, + { 0xb4, 0xb8 }, + { 0xb8, 0xb4 }, + { 0xb9, 0xb2 }, + { 0xba, 0xaa }, + { 0xbc, 0xbd }, + { 0xbd, 0xbc }, + { 0xbe, 0xff }, + { 0xbf, 0xaf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xbe } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_16 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-16", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c new file mode 100644 index 0000000..f8cb375 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_2.c @@ -0,0 +1,292 @@ +/********************************************************************** + iso8859_2.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ + ((EncISO_8859_2_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_2_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, + 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, + 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, + 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 +}; + +static int +iso_8859_2_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_2_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf is lower case letter, but can't convert. */ + if (*p == 0xdf) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xb1, 0xa1 }, + { 0xb3, 0xa3 }, + { 0xb5, 0xa5 }, + { 0xb6, 0xa6 }, + { 0xb9, 0xa9 }, + { 0xba, 0xaa }, + { 0xbb, 0xab }, + { 0xbc, 0xac }, + { 0xbe, 0xae }, + { 0xbf, 0xaf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +static int +iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_2_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_2 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-2", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_2_mbc_to_normalize, + iso_8859_2_is_mbc_ambiguous, + iso_8859_2_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_2_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c new file mode 100644 index 0000000..e62d20d --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_3.c @@ -0,0 +1,281 @@ +/********************************************************************** + iso8859_3.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ + ((EncISO_8859_3_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_3_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0, + 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2, + 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0, + 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 +}; + +static int +iso_8859_3_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_3_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xdf || *p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_3_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xb1 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xaf, 0xbf }, + { 0xb1, 0xa1 }, + { 0xb6, 0xa6 }, + { 0xb9, 0xa9 }, + { 0xba, 0xaa }, + { 0xbb, 0xab }, + { 0xbc, 0xac }, + { 0xbf, 0xaf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_3 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-3", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_3_mbc_to_normalize, + iso_8859_3_is_mbc_ambiguous, + iso_8859_3_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_3_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c new file mode 100644 index 0000000..dd6bd7d --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_4.c @@ -0,0 +1,290 @@ +/********************************************************************** + iso8859_4.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ + ((EncISO_8859_4_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_4_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, + 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0, + 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, + 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 +}; + +static int +iso_8859_4_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_4_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xdf || *p == 0xa2) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_4_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + + { 0xb1, 0xa1 }, + { 0xb3, 0xa3 }, + { 0xb5, 0xa5 }, + { 0xb6, 0xa6 }, + { 0xb9, 0xa9 }, + { 0xba, 0xaa }, + { 0xbb, 0xab }, + { 0xbc, 0xac }, + { 0xbe, 0xae }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_4 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-4", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_4_mbc_to_normalize, + iso_8859_4_is_mbc_ambiguous, + iso_8859_4_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_4_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c new file mode 100644 index 0000000..87b7fb8 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_5.c @@ -0,0 +1,296 @@ +/********************************************************************** + iso8859_5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ + ((EncISO_8859_5_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_5_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2 +}; + +static int +iso_8859_5_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_5_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_5_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xf1 }, + { 0xa2, 0xf2 }, + { 0xa3, 0xf3 }, + { 0xa4, 0xf4 }, + { 0xa5, 0xf5 }, + { 0xa6, 0xf6 }, + { 0xa7, 0xf7 }, + { 0xa8, 0xf8 }, + { 0xa9, 0xf9 }, + { 0xaa, 0xfa }, + { 0xab, 0xfb }, + { 0xac, 0xfc }, + { 0xae, 0xfe }, + { 0xaf, 0xff }, + + { 0xb0, 0xd0 }, + { 0xb1, 0xd1 }, + { 0xb2, 0xd2 }, + { 0xb3, 0xd3 }, + { 0xb4, 0xd4 }, + { 0xb5, 0xd5 }, + { 0xb6, 0xd6 }, + { 0xb7, 0xd7 }, + { 0xb8, 0xd8 }, + { 0xb9, 0xd9 }, + { 0xba, 0xda }, + { 0xbb, 0xdb }, + { 0xbc, 0xdc }, + { 0xbd, 0xdd }, + { 0xbe, 0xdf }, + { 0xbf, 0xdf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xb0 }, + { 0xd1, 0xb1 }, + { 0xd2, 0xb2 }, + { 0xd3, 0xb3 }, + { 0xd4, 0xb4 }, + { 0xd5, 0xb5 }, + { 0xd6, 0xb6 }, + { 0xd7, 0xb7 }, + { 0xd8, 0xb8 }, + { 0xd9, 0xb9 }, + { 0xda, 0xba }, + { 0xdb, 0xbb }, + { 0xdc, 0xbc }, + { 0xdd, 0xbd }, + { 0xde, 0xbe }, + { 0xdf, 0xbf }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf1, 0xa1 }, + { 0xf2, 0xa2 }, + { 0xf3, 0xa3 }, + { 0xf4, 0xa4 }, + { 0xf5, 0xa5 }, + { 0xf6, 0xa6 }, + { 0xf7, 0xa7 }, + { 0xf8, 0xa8 }, + { 0xf9, 0xa9 }, + { 0xfa, 0xaa }, + { 0xfb, 0xab }, + { 0xfc, 0xac }, + { 0xfe, 0xae }, + { 0xff, 0xaf } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_5 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-5", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_5_mbc_to_normalize, + iso_8859_5_is_mbc_ambiguous, + iso_8859_5_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + iso_8859_5_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c new file mode 100644 index 0000000..fffcd0e --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_6.c @@ -0,0 +1,105 @@ +/********************************************************************** + iso8859_6.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ + ((EncISO_8859_6_CtypeTable[code] & ctype) != 0) + +static const unsigned short EncISO_8859_6_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_6_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_6 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-6", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_to_normalize, + onigenc_ascii_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + iso_8859_6_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c new file mode 100644 index 0000000..e87661d --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_7.c @@ -0,0 +1,278 @@ +/********************************************************************** + iso8859_7.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ + ((EncISO_8859_7_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', + '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', + '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_7_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2, + 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000 +}; + +static int +iso_8859_7_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_7_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + if (*p == 0xc0 || *p == 0xe0) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_7_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xb6, 0xdc }, + { 0xb8, 0xdd }, + { 0xb9, 0xde }, + { 0xba, 0xdf }, + { 0xbc, 0xfc }, + { 0xbe, 0xfd }, + { 0xbf, 0xfe }, + + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xb6 }, + { 0xdd, 0xb8 }, + { 0xde, 0xb9 }, + { 0xdf, 0xba }, + + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xbc }, + { 0xfd, 0xbe }, + { 0xfe, 0xbf } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_7 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-7", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_7_mbc_to_normalize, + iso_8859_7_is_mbc_ambiguous, + iso_8859_7_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + iso_8859_7_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c new file mode 100644 index 0000000..e76966c --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_8.c @@ -0,0 +1,105 @@ +/********************************************************************** + iso8859_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ + ((EncISO_8859_8_CtypeTable[code] & ctype) != 0) + +static const unsigned short EncISO_8859_8_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, + 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_8_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_8 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_to_normalize, + onigenc_ascii_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + iso_8859_8_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c new file mode 100644 index 0000000..16a30c5 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/iso8859_9.c @@ -0,0 +1,270 @@ +/********************************************************************** + iso8859_9.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ + ((EncISO_8859_9_CtypeTable[code] & ctype) != 0) + +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_9_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static int +iso_8859_9_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncISO_8859_9_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_9_CTYPE(code, ctype); + else + return FALSE; +} + +static int +iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingISO_8859_9 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-9", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + iso_8859_9_mbc_to_normalize, + iso_8859_9_is_mbc_ambiguous, + iso_8859_9_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + iso_8859_9_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c new file mode 100644 index 0000000..d7277e8 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/koi8.c @@ -0,0 +1,264 @@ +/********************************************************************** + koi8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c] +#define ENC_IS_KOI8_CTYPE(code,ctype) \ + ((EncKOI8_CtypeTable[code] & ctype) != 0) + +static const UChar EncKOI8_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 +}; + +static int +koi8_mbc_to_normalize(OnigAmbigType flag, + const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower) +{ + const OnigUChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_KOI8_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) +{ + const OnigUChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncKOI8_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + + +static int +koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_CTYPE(code, ctype); + else + return FALSE; +} + +static int +koi8_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfe, 0xde }, + { 0xff, 0xdf } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingKOI8 = { + onigenc_single_byte_mbc_enc_len, + "KOI8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_mbc_to_normalize, + koi8_is_mbc_ambiguous, + koi8_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + koi8_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c new file mode 100644 index 0000000..1010f5f --- /dev/null +++ b/ext/mbstring/oniguruma/enc/koi8_r.c @@ -0,0 +1,266 @@ +/********************************************************************** + koi8_r.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] +#define ENC_IS_KOI8_R_CTYPE(code,ctype) \ + ((EncKOI8_R_CtypeTable[code] & ctype) != 0) + +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_R_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 +}; + +static int +koi8_r_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +static int +koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncKOI8_R_CtypeTable[*p] & + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} + +static int +koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_R_CTYPE(code, ctype); + else + return FALSE; +} + +static int +koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xa3, 0xb3 }, + { 0xb3, 0xa3 }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfe, 0xde }, + { 0xff, 0xdf } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return 52; + } + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +OnigEncodingType OnigEncodingKOI8_R = { + onigenc_single_byte_mbc_enc_len, + "KOI8-R", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_r_mbc_to_normalize, + koi8_r_is_mbc_ambiguous, + koi8_r_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + koi8_r_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c new file mode 100644 index 0000000..fcf0574 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/mktable.c @@ -0,0 +1,1115 @@ +/********************************************************************** + mktable.c +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <stdio.h> + +#define NOT_RUBY +#include "regenc.h" + +#define UNICODE_ISO_8859_1 0 +#define ISO_8859_1 1 +#define ISO_8859_2 2 +#define ISO_8859_3 3 +#define ISO_8859_4 4 +#define ISO_8859_5 5 +#define ISO_8859_6 6 +#define ISO_8859_7 7 +#define ISO_8859_8 8 +#define ISO_8859_9 9 +#define ISO_8859_10 10 +#define ISO_8859_11 11 +#define ISO_8859_13 12 +#define ISO_8859_14 13 +#define ISO_8859_15 14 +#define ISO_8859_16 15 +#define KOI8 16 +#define KOI8_R 17 + +typedef struct { + int num; + char* name; +} ENC_INFO; + +static ENC_INFO Info[] = { + { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, + { ISO_8859_1, "ISO_8859_1" }, + { ISO_8859_2, "ISO_8859_2" }, + { ISO_8859_3, "ISO_8859_3" }, + { ISO_8859_4, "ISO_8859_4" }, + { ISO_8859_5, "ISO_8859_5" }, + { ISO_8859_6, "ISO_8859_6" }, + { ISO_8859_7, "ISO_8859_7" }, + { ISO_8859_8, "ISO_8859_8" }, + { ISO_8859_9, "ISO_8859_9" }, + { ISO_8859_10, "ISO_8859_10" }, + { ISO_8859_11, "ISO_8859_11" }, + { ISO_8859_13, "ISO_8859_13" }, + { ISO_8859_14, "ISO_8859_14" }, + { ISO_8859_15, "ISO_8859_15" }, + { ISO_8859_16, "ISO_8859_16" }, + { KOI8, "KOI8" }, + { KOI8_R, "KOI8_R" } +}; + + +static int IsAlpha(int enc, int c) +{ + if (c >= 0x41 && c <= 0x5a) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xff) return 1; + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xa4 || c == 0xa5) return 1; + if (c == 0xa6 || c == 0xa8) return 1; + if (c >= 0xaa && c <= 0xac) return 1; + if (c >= 0xaf && c <= 0xb5) return 1; + if (c >= 0xb7 && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsBlank(int enc, int c) +{ + if (c == 0x09 || c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsCntrl(int enc, int c) +{ + if (c >= 0x00 && c <= 0x1F) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0xad) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c >= 0x7f && c <= 0x9F) return 1; + break; + + + case KOI8_R: + if (c == 0x7f) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsDigit(int enc, int c) +{ + if (c >= 0x30 && c <= 0x39) return 1; + return 0; +} + +static int IsGraph(int enc, int c) +{ + if (c >= 0x21 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa1 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa1) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa1 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa2 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsLower(int enc, int c) +{ + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c == 0xa2) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xc0) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + break; + + case ISO_8859_10: + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_11: + break; + + case ISO_8859_13: + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa2) return 1; + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa8) return 1; + if (c == 0xb8) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_16: + if (c == 0xa2) return 1; + if (c == 0xa8) return 1; + if (c == 0xae) return 1; + if (c == 0xb3) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xdf) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPrint(int enc, int c) +{ + if (c >= 0x20 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa0 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa0) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa0) return 1; + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa0 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa0 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + if (c == 0xa1) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa0 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c == 0xa0) return 1; + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPunct(int enc, int c) +{ + if (enc == UNICODE_ISO_8859_1) { + if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || + c == 0x7c || c == 0x7e) return 1; + if (c >= 0x3c && c <= 0x3e) return 1; + } + + if (c >= 0x21 && c <= 0x2f) return 1; + if (c >= 0x3a && c <= 0x40) return 1; + if (c >= 0x5b && c <= 0x60) return 1; + if (c >= 0x7b && c <= 0x7e) return 1; + + switch (enc) { + case ISO_8859_1: + case ISO_8859_9: + case ISO_8859_15: + if (c == 0xad) return 1; + /* fall */ + case UNICODE_ISO_8859_1: + if (c == 0xa1) return 1; + if (c == 0xab) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_14: + if (c == 0xad) return 1; + break; + + case ISO_8859_3: + case ISO_8859_10: + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_6: + if (c == 0xac) return 1; + if (c == 0xad) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_7: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xab) return 1; + if (c == 0xaf) return 1; + if (c == 0xad) return 1; + if (c == 0xb7 || c == 0xbb) return 1; + break; + + case ISO_8859_8: + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xdf) return 1; + break; + + case ISO_8859_13: + if (c == 0xa1 || c == 0xa5) return 1; + if (c == 0xab || c == 0xad) return 1; + if (c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xff) return 1; + break; + + case ISO_8859_16: + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb5) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + break; + + case KOI8_R: + if (c == 0x9e) return 1; + break; + + case ISO_8859_11: + case KOI8: + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsSpace(int enc, int c) +{ + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsUpper(int enc, int c) +{ + if (c >= 0x41 && c <= 0x5a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_4: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + break; + + case ISO_8859_8: + case ISO_8859_11: + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1) return 1; + if (c == 0xa4 || c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa || c == 0xac) return 1; + if (c == 0xaf || c == 0xb0) return 1; + if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb || c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_15: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + if (c == 0xa6) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case KOI8_R: + if (c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xe0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsXDigit(int enc, int c) +{ + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x46) return 1; + if (c >= 0x61 && c <= 0x66) return 1; + return 0; +} + +static int IsWord(int enc, int c) +{ + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x5a) return 1; + if (c == 0x5f) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xea) return 1; + if (c >= 0xeb && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb2 || c == 0xb3) return 1; + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xff) { + if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd) + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c >= 0xa1 && c <= 0xff) { + if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae || + c == 0xb6) return 0; + return 1; + } + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0x9d) return 1; + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsAscii(int enc, int c) +{ + if (c >= 0x00 && c <= 0x7f) return 1; + return 0; +} + +static int IsNewline(int enc, int c) +{ + if (c == 0x0a) return 1; + return 0; +} + +static int exec(FILE* fp, ENC_INFO* einfo) +{ +#define NCOL 8 + + int c, val, enc; + + enc = einfo->num; + + fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n", + einfo->name); + + for (c = 0; c < 256; c++) { + val = 0; + if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE; + if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA; + if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK; + if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL; + if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT; + if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH; + if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER; + if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT; + if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT; + if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE; + if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER; + if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT; + if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD; + if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII; + + if (c % NCOL == 0) fputs(" ", fp); + fprintf(fp, "0x%04x", val); + if (c != 255) fputs(",", fp); + if (c != 0 && c % NCOL == (NCOL-1)) + fputs("\n", fp); + else + fputs(" ", fp); + } + fprintf(fp, "};\n"); + return 0; +} + +extern int main(int argc, char* argv[]) +{ + int i; + FILE* fp = stdout; + + for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) { + exec(fp, &Info[i]); + } +} diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c new file mode 100644 index 0000000..f7d7d52 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/sjis.c @@ -0,0 +1,238 @@ +/********************************************************************** + sjis.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_SJIS[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; + +#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) +#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] + +static int +sjis_mbc_enc_len(const UChar* p) +{ + return EncLen_SJIS[*p]; +} + +static int +sjis_code_to_mbclen(OnigCodePoint code) +{ + if (code < 256) { + if (EncLen_SJIS[(int )code] == 1) + return 1; + else + return 0; + } + else if (code <= 0xffff) { + return 2; + } + else + return 0; +} + +static OnigCodePoint +sjis_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_SJIS, p); + c = *p++; + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +sjis_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 0 + if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +static int +sjis_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp)++; + return 1; + } + else { + int len = enc_len(ONIG_ENCODING_SJIS, p); + + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); + +} + +static int +sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +sjis_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (SJIS_ISMB_TRAIL(*p)) { + while (p > start) { + if (! SJIS_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enc_len(ONIG_ENCODING_SJIS, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +sjis_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingType OnigEncodingSJIS = { + sjis_mbc_enc_len, + "Shift_JIS", /* name */ + 2, /* max byte length */ + 1, /* min byte length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + sjis_mbc_to_code, + sjis_code_to_mbclen, + sjis_code_to_mbc, + sjis_mbc_to_normalize, + sjis_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + sjis_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + sjis_left_adjust_char_head, + sjis_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c new file mode 100644 index 0000000..a8cf539 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/unicode.c @@ -0,0 +1,3403 @@ +/********************************************************************** + unicode.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + + +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; + +static const OnigCodePoint CRAlnum[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 414, +#else + 9, +#endif + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x1371, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRAlnum */ + +static const OnigCodePoint CRAlpha[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 396, +#else + 8, +#endif + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRAlpha */ + +static const OnigCodePoint CRBlank[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 9, +#else + 3, +#endif + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRBlank */ + +static const OnigCodePoint CRCntrl[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 19, +#else + 3, +#endif + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRCntrl */ + +static const OnigCodePoint CRDigit[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 23, +#else + 1, +#endif + 0x0030, 0x0039 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be7, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1369, 0x1371, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRDigit */ + +static const OnigCodePoint CRGraph[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 405, +#else + 2, +#endif + 0x0021, 0x007e, + 0x00a1, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x2054, + 0x2057, 0x2057, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRGraph */ + +static const OnigCodePoint CRLower[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 424, +#else + 6, +#endif + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0236, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d6b, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213d, 0x213d, + 0x2146, 0x2149, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a3, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRLower */ + +static const OnigCodePoint CRPrint[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 405, +#else + 4, +#endif + 0x0009, 0x000d, + 0x0020, 0x007e, + 0x0085, 0x0085, + 0x00a0, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2054, + 0x2057, 0x2057, + 0x205f, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRPrint */ + +static const OnigCodePoint CRPunct[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 86, +#else + 14, +#endif + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x2054, + 0x2057, 0x2057, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRPunct */ + +static const OnigCodePoint CRSpace[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 11, +#else + 4, +#endif + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRSpace */ + +static const OnigCodePoint CRUpper[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 421, +#else + 3, +#endif + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x0400, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRUpper */ + +static const OnigCodePoint CRXDigit[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 3, +#else + 3, +#endif + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; + +static const OnigCodePoint CRASCII[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 1, +#else + 1, +#endif + 0x0000, 0x007f +}; + +static const OnigCodePoint CRWord[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 436, +#else + 12, +#endif + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, +#ifndef USE_UNICODE_FULL_RANGE_CTYPE + 0x00f8, 0x7fffffff +#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ + 0x00f8, 0x0236, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff65, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of CRWord */ + + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + return onig_is_in_code_range((UChar* )CRAlpha, code); + break; + case ONIGENC_CTYPE_BLANK: + return onig_is_in_code_range((UChar* )CRBlank, code); + break; + case ONIGENC_CTYPE_CNTRL: + return onig_is_in_code_range((UChar* )CRCntrl, code); + break; + case ONIGENC_CTYPE_DIGIT: + return onig_is_in_code_range((UChar* )CRDigit, code); + break; + case ONIGENC_CTYPE_GRAPH: + return onig_is_in_code_range((UChar* )CRGraph, code); + break; + case ONIGENC_CTYPE_LOWER: + return onig_is_in_code_range((UChar* )CRLower, code); + break; + case ONIGENC_CTYPE_PRINT: + return onig_is_in_code_range((UChar* )CRPrint, code); + break; + case ONIGENC_CTYPE_PUNCT: + return onig_is_in_code_range((UChar* )CRPunct, code); + break; + case ONIGENC_CTYPE_SPACE: + return onig_is_in_code_range((UChar* )CRSpace, code); + break; + case ONIGENC_CTYPE_UPPER: + return onig_is_in_code_range((UChar* )CRUpper, code); + break; + case ONIGENC_CTYPE_XDIGIT: + return FALSE; + break; + case ONIGENC_CTYPE_WORD: + return onig_is_in_code_range((UChar* )CRWord, code); + break; + case ONIGENC_CTYPE_ASCII: + return FALSE; + break; + case ONIGENC_CTYPE_ALNUM: + return onig_is_in_code_range((UChar* )CRAlnum, code); + break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + +#else + + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + return TRUE; + } + return FALSE; +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +} + +extern int +onigenc_unicode_get_ctype_code_range(int ctype, + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) +{ + static const OnigCodePoint EmptyRange[] = { 0 }; + +#define CR_SET(list) do { \ + *mbr = list; \ +} while (0) + + *sbr = EmptyRange; + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + CR_SET(CRAlpha); + break; + case ONIGENC_CTYPE_BLANK: + CR_SET(CRBlank); + break; + case ONIGENC_CTYPE_CNTRL: + CR_SET(CRCntrl); + break; + case ONIGENC_CTYPE_DIGIT: + CR_SET(CRDigit); + break; + case ONIGENC_CTYPE_GRAPH: + CR_SET(CRGraph); + break; + case ONIGENC_CTYPE_LOWER: + CR_SET(CRLower); + break; + case ONIGENC_CTYPE_PRINT: + CR_SET(CRPrint); + break; + case ONIGENC_CTYPE_PUNCT: + CR_SET(CRPunct); + break; + case ONIGENC_CTYPE_SPACE: + CR_SET(CRSpace); + break; + case ONIGENC_CTYPE_UPPER: + CR_SET(CRUpper); + break; + case ONIGENC_CTYPE_XDIGIT: + CR_SET(CRXDigit); + break; + case ONIGENC_CTYPE_WORD: + CR_SET(CRWord); + break; + case ONIGENC_CTYPE_ASCII: + CR_SET(CRASCII); + break; + case ONIGENC_CTYPE_ALNUM: + CR_SET(CRAlnum); + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return 0; +} diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c new file mode 100644 index 0000000..6ab80a6 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -0,0 +1,232 @@ +/********************************************************************** + utf16_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) +#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16be_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*p]; +} + +static int +utf16be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*(p+1) == 0x0a && *p == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16be_mbc_to_code(const UChar* p, const UChar* end) +{ + OnigCodePoint code; + + if (UTF16_IS_SURROGATE_FIRST(*p)) { + code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) + + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) + + p[3]; + } + else { + code = p[0] * 256 + p[1]; + } + return code; +} + +static int +utf16be_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = code >> 16; + *p++ = (plane >> 2) + 0xd8; + high = (code & 0xff00) >> 8; + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (high & 0x02) + 0xdc; + *p = (UChar )(code & 0xff); + return 4; + } + else { + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )(code & 0xff); + return 2; + } +} + +static int +utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0) { + p++; + *lower++ = '\0'; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp) += 2; + return 2; /* return byte length of converted char to lower */ + } + else { + int len; + len = EncLen_UTF16[*p]; + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*p]; + + if (*p == 0) { + int c, v; + + p++; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +utf16be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +OnigEncodingType OnigEncodingUTF16_BE = { + utf16be_mbc_enc_len, + "UTF-16BE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + utf16be_is_mbc_newline, + utf16be_mbc_to_code, + utf16be_code_to_mbclen, + utf16be_code_to_mbc, + utf16be_mbc_to_normalize, + utf16be_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + onigenc_unicode_is_code_ctype, + onigenc_unicode_get_ctype_code_range, + utf16be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c new file mode 100644 index 0000000..2248e49 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -0,0 +1,230 @@ +/********************************************************************** + utf16_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) +#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16le_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16le_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*(p+1)]; +} + +static int +utf16le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*p == 0x0a && *(p+1) == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16le_mbc_to_code(const UChar* p, const UChar* end) +{ + OnigCodePoint code; + UChar c0 = *p; + UChar c1 = *(p+1); + + if (UTF16_IS_SURROGATE_FIRST(c1)) { + code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) + + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) + + p[2]; + } + else { + code = c1 * 256 + p[0]; + } + return code; +} + +static int +utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = code >> 16; + high = (code & 0xff00) >> 8; + + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (plane >> 2) + 0xd8; + *p++ = (UChar )(code & 0xff); + *p = (high & 0x02) + 0xdc; + return 4; + } + else { + *p++ = (UChar )(code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + return 2; + } +} + +static int +utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + const UChar* p = *pp; + + if (*(p+1) == 0) { + *(lower+1) = '\0'; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp) += 2; + return 2; /* return byte length of converted char to lower */ + } + else { + int len = EncLen_UTF16[*(p+1)]; + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*(p+1)]; + + if (*(p+1) == 0) { + int c, v; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +utf16le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +OnigEncodingType OnigEncodingUTF16_LE = { + utf16le_mbc_enc_len, + "UTF-16LE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + utf16le_is_mbc_newline, + utf16le_mbc_to_code, + utf16le_code_to_mbclen, + utf16le_code_to_mbc, + utf16le_mbc_to_normalize, + utf16le_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + onigenc_unicode_is_code_ctype, + onigenc_unicode_get_ctype_code_range, + utf16le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c new file mode 100644 index 0000000..75133ca --- /dev/null +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -0,0 +1,187 @@ +/********************************************************************** + utf32_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32be_mbc_enc_len(const UChar* p) +{ + return 4; +} + +static int +utf32be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+3) == 0x0d || *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32be_mbc_to_code(const UChar* p, const UChar* end) +{ + return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +} + +static int +utf32be_code_to_mbclen(OnigCodePoint code) +{ + return 4; +} + +static int +utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar )((code & 0xff000000) >>24); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar ) (code & 0xff); + return 4; +} + +static int +utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + const UChar* p = *pp; + + if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { + p += 3; + *lower++ = '\0'; + *lower++ = '\0'; + *lower++ = '\0'; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp) += 4; + return 4; /* return byte length of converted char to lower */ + } + else { + int len = 4; + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { + int c, v; + + p += 3; + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +utf32be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +OnigEncodingType OnigEncodingUTF32_BE = { + utf32be_mbc_enc_len, + "UTF-32BE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + utf32be_is_mbc_newline, + utf32be_mbc_to_code, + utf32be_code_to_mbclen, + utf32be_code_to_mbc, + utf32be_mbc_to_normalize, + utf32be_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + onigenc_unicode_is_code_ctype, + onigenc_unicode_get_ctype_code_range, + utf32be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c new file mode 100644 index 0000000..21dca10 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -0,0 +1,185 @@ +/********************************************************************** + utf32_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32le_mbc_enc_len(const UChar* p) +{ + return 4; +} + +static int +utf32le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00 + && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32le_mbc_to_code(const UChar* p, const UChar* end) +{ + return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +} + +static int +utf32le_code_to_mbclen(OnigCodePoint code) +{ + return 4; +} + +static int +utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar ) (code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff000000) >>24); + return 4; +} + +static int +utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + const UChar* p = *pp; + + if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + } + else { + *lower++ = *p; + } + *lower++ = '\0'; + *lower++ = '\0'; + *lower = '\0'; + + (*pp) += 4; + return 4; /* return byte length of converted char to lower */ + } + else { + int len = 4; + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { + int c, v; + + if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, + (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); + if ((v | ONIGENC_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + } + + return FALSE; +} + +static UChar* +utf32le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +OnigEncodingType OnigEncodingUTF32_LE = { + utf32le_mbc_enc_len, + "UTF-32LE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + utf32le_is_mbc_newline, + utf32le_mbc_to_code, + utf32le_code_to_mbclen, + utf32le_code_to_mbc, + utf32le_mbc_to_normalize, + utf32le_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + onigenc_unicode_is_code_ctype, + onigenc_unicode_get_ctype_code_range, + utf32le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c new file mode 100644 index 0000000..c7481d7 --- /dev/null +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -0,0 +1,3730 @@ +/********************************************************************** + utf8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static const int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + +static int +utf8_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF8[*p]; +} + +static int +utf8_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (*p == 0x0d) return 1; + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + +static OnigCodePoint +utf8_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_UTF8, p); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +utf8_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) { + if (code <= 0xff && code >= 0xfe) + return 1; + return 2; + } + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; +} + +#if 0 +static int +utf8_code_to_mbc_first(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) + return code; + else { + if ((code & 0xfffff800) == 0) + return ((code>>6)& 0x1f) | 0xc0; + else if ((code & 0xffff0000) == 0) + return ((code>>12) & 0x0f) | 0xe0; + else if ((code & 0xffe00000) == 0) + return ((code>>18) & 0x07) | 0xf0; + else if ((code & 0xfc000000) == 0) + return ((code>>24) & 0x03) | 0xf8; + else if ((code & 0x80000000) == 0) + return ((code>>30) & 0x01) | 0xfc; + else { + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + } + } +} +#endif + +static int +utf8_code_to_mbc(OnigCodePoint code, UChar *buf) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return p - buf; + } +} + +static int +utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + int len; + + if (*p == 195) { /* 195 == '\303' */ + int c = *(p + 1); + if (c >= 128) { + if (c <= (UChar )'\236' && /* upper */ + (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { + if (c != (UChar )'\227') { + *lower++ = *p; + *lower = (UChar )(c + 32); + (*pp) += 2; + return 2; + } + } + } + } + + len = enc_len(ONIG_ENCODING_UTF8, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + } + else { + (*pp) += enc_len(ONIG_ENCODING_UTF8, p); + + if (*p == 195) { /* 195 == '\303' */ + int c = *(p + 1); + if (c >= 128) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { + if (c <= (UChar )'\236') { /* upper */ + if (c == (UChar )'\227') return FALSE; + return TRUE; + } + else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */ + if (c == (UChar )'\267') return FALSE; + return TRUE; + } + } + } + } + } + + return FALSE; +} + + +static const OnigCodePoint EmptyRange[] = { 0 }; + +static const OnigCodePoint SBAlnum[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static const OnigCodePoint MBAlnum[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 411, +#else + 6, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x1371, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlnum */ + +static const OnigCodePoint SBAlpha[] = { + 2, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static const OnigCodePoint MBAlpha[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 394, +#else + 6, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlpha */ + +static const OnigCodePoint SBBlank[] = { + 2, + 0x0009, 0x0009, + 0x0020, 0x0020 +}; + +static const OnigCodePoint MBBlank[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 7, +#else + 1, +#endif + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBBlank */ + +static const OnigCodePoint SBCntrl[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x007f +}; + +static const OnigCodePoint MBCntrl[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 18, +#else + 2, +#endif + 0x0080, 0x009f, + 0x00ad, 0x00ad +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBCntrl */ + +static const OnigCodePoint SBDigit[] = { + 1, + 0x0030, 0x0039 +}; + +static const OnigCodePoint MBDigit[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 22, +#else + 0 +#endif +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be7, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1369, 0x1371, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBDigit */ + +static const OnigCodePoint SBGraph[] = { + 1, + 0x0021, 0x007e +}; + +static const OnigCodePoint MBGraph[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 404, +#else + 1, +#endif + 0x00a1, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x2054, + 0x2057, 0x2057, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBGraph */ + +static const OnigCodePoint SBLower[] = { + 1, + 0x0061, 0x007a +}; + +static const OnigCodePoint MBLower[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 423, +#else + 5, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0236, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d6b, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213d, 0x213d, + 0x2146, 0x2149, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a3, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBLower */ + +static const OnigCodePoint SBPrint[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x007e +}; + +static const OnigCodePoint MBPrint[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 403, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2054, + 0x2057, 0x2057, + 0x205f, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPrint */ + +static const OnigCodePoint SBPunct[] = { + 9, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d +}; /* end of SBPunct */ + +static const OnigCodePoint MBPunct[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 77, +#else + 5, +#endif + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x2054, + 0x2057, 0x2057, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPunct */ + +static const OnigCodePoint SBSpace[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x0020 +}; + +static const OnigCodePoint MBSpace[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 9, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBSpace */ + +static const OnigCodePoint SBUpper[] = { + 1, + 0x0041, 0x005a +}; + +static const OnigCodePoint MBUpper[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 420, +#else + 2, +#endif + 0x00c0, 0x00d6, + 0x00d8, 0x00de +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x0400, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBUpper */ + +static const OnigCodePoint SBXDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; + +static const OnigCodePoint SBASCII[] = { + 1, + 0x0000, 0x007f +}; + +static const OnigCodePoint SBWord[] = { + 4, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a +}; + +static const OnigCodePoint MBWord[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 432, +#else + 8, +#endif + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, +#ifndef USE_UNICODE_FULL_RANGE_CTYPE + 0x00f8, 0x7fffffff +#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ + 0x00f8, 0x0236, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff65, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBWord */ + + +static int +utf8_get_ctype_code_range(int ctype, + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) +{ +#define CR_SET(sbl,mbl) do { \ + *sbr = sbl; \ + *mbr = mbl; \ +} while (0) + +#define CR_SB_SET(sbl) do { \ + *sbr = sbl; \ + *mbr = EmptyRange; \ +} while (0) + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + CR_SET(SBAlpha, MBAlpha); + break; + case ONIGENC_CTYPE_BLANK: + CR_SET(SBBlank, MBBlank); + break; + case ONIGENC_CTYPE_CNTRL: + CR_SET(SBCntrl, MBCntrl); + break; + case ONIGENC_CTYPE_DIGIT: + CR_SET(SBDigit, MBDigit); + break; + case ONIGENC_CTYPE_GRAPH: + CR_SET(SBGraph, MBGraph); + break; + case ONIGENC_CTYPE_LOWER: + CR_SET(SBLower, MBLower); + break; + case ONIGENC_CTYPE_PRINT: + CR_SET(SBPrint, MBPrint); + break; + case ONIGENC_CTYPE_PUNCT: + CR_SET(SBPunct, MBPunct); + break; + case ONIGENC_CTYPE_SPACE: + CR_SET(SBSpace, MBSpace); + break; + case ONIGENC_CTYPE_UPPER: + CR_SET(SBUpper, MBUpper); + break; + case ONIGENC_CTYPE_XDIGIT: + CR_SB_SET(SBXDigit); + break; + case ONIGENC_CTYPE_WORD: + CR_SET(SBWord, MBWord); + break; + case ONIGENC_CTYPE_ASCII: + CR_SB_SET(SBASCII); + break; + case ONIGENC_CTYPE_ALNUM: + CR_SET(SBAlnum, MBAlnum); + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return 0; +} + +static int +utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + const OnigCodePoint *range; +#endif + + if (code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + range = MBAlpha; + break; + case ONIGENC_CTYPE_BLANK: + range = MBBlank; + break; + case ONIGENC_CTYPE_CNTRL: + range = MBCntrl; + break; + case ONIGENC_CTYPE_DIGIT: + range = MBDigit; + break; + case ONIGENC_CTYPE_GRAPH: + range = MBGraph; + break; + case ONIGENC_CTYPE_LOWER: + range = MBLower; + break; + case ONIGENC_CTYPE_PRINT: + range = MBPrint; + break; + case ONIGENC_CTYPE_PUNCT: + range = MBPunct; + break; + case ONIGENC_CTYPE_SPACE: + range = MBSpace; + break; + case ONIGENC_CTYPE_UPPER: + range = MBUpper; + break; + case ONIGENC_CTYPE_XDIGIT: + return FALSE; + break; + case ONIGENC_CTYPE_WORD: + range = MBWord; + break; + case ONIGENC_CTYPE_ASCII: + return FALSE; + break; + case ONIGENC_CTYPE_ALNUM: + range = MBAlnum; + break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return onig_is_in_code_range((UChar* )range, code); + +#else + + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { +#ifdef USE_INVALID_CODE_SCHEME + if (code <= VALID_CODE_LIMIT) +#endif + return TRUE; + } +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ + + return FALSE; +} + +static UChar* +utf8_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +OnigEncodingType OnigEncodingUTF8 = { + utf8_mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + utf8_is_mbc_newline, + utf8_mbc_to_code, + utf8_code_to_mbclen, + utf8_code_to_mbc, + utf8_mbc_to_normalize, + utf8_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + utf8_is_code_ctype, + utf8_get_ctype_code_range, + utf8_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html new file mode 100755 index 0000000..d55f1cc --- /dev/null +++ b/ext/mbstring/oniguruma/index.html @@ -0,0 +1,187 @@ +<html> +<head> + <meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis"> + <title>Oniguruma</title> +</head> +<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969"> + +<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) + +<p> +(c) K.Kosako, updated at: 2007/08/16 +</p> + +<dl> +<font color="orange"> +<dt><b>What's new</b> +</font> +<ul> +<li>2007/08/16: Version 4.7.1 released.</li> +<li>2007/07/14: Version 5.9.0 released.</li> +<li>2007/06/20: Version 2.5.9 released.</li> +<li>2007/06/20: Maintainer of 2.x was changed.</li> +</ul> +</dl> +<hr> + +<p> +Oniguruma is a regular expressions library.<br> +The characteristics of this library is that different character encoding +<br>for every regular expression object can be specified. +<br>(supported APIs: GNU regex, POSIX and Oniguruma native) +</p> + +<dl> +<dt><b>Supported character encodings:</b><br> +ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br> +EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br> +Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br> +ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br> +ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br> +ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br> +<font color="orange"> +(GB18030 encoding was contributed by KUBO Takehiro)<br> +(CP1251 encoding was contributed by Byte) +</font> +</p> +</dl> + +<hr> + +<dt><b>License:</b> BSD license. + +<dl> +<dt><b>Platform:</b> +<ul> +<li> Unix (include Mac OS X) +<li> Cygwin +<li> Win32 +</ul> + +<br> + +<dt><b>Download:</b> +<ul> +<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14) <a href="HISTORY_5X.txt">Change Log</a> +<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04) +<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27) +<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a> +<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18) +<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a> +</ul> + +<br> +<font color="red"> +Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.<br> +About 2.x, please contact him.<br> +</font> +* 5.x supports Unicode Property/Script.<br> +* 2.x supports Ruby1.6/1.8.<br> + +<br> +<dt><b>Documents:</b> (version 5.9.0) +<ul> + <li> <a href="doc/RE.txt">Regular Expressions</a> + <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a> + <li> <a href="doc/API.txt">Oniguruma API</a> + <a href="doc/API.ja.txt">(Japanese: EUC-JP)</a> +</ul> + +<br> +<dt><b>Sample Programs:</b> +<ul> + <li><a href="sample/simple.c">example of the minimum</a> + <li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a> +</ul> + +<br> +<dt><b>Site Links:</b> +<ul> +<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a> +<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page) +</ul> + +<br> +<dt><b>Links:</b> +<ul> +<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page) +<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page) +<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page) +<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a> +<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page) +<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page) +<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page) +<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog) +<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a> +<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page) +<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a> +<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page) +<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page) +<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page) +<li> <a href="http://monaos.org/">Mona OS</a> +<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page) +<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page) +<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page) +<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page) +<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page) +<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a> +<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a> +<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page) +<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page) +<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding +<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page) +<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page) +<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page) +<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page) +<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page) +<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page) +<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page) +<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page) +<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a> +<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a> +<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a> +<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a> +<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page) +</ul> + +<br> +<dt><b>References:</b> +<ul> +<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page) +<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a> +<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a> +<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a> +<li> <a href="http://regex.info/">Mastering Regular Expressions</a> +<li> <a href="http://www.unicode.org/">Unicode Home Page</a> +<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page) +<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page) +</ul> + +<br> +</dl> +<p> +and I'm thankful to Akinori MUSHA. +</p> + +<hr> +<dl> +<dt><b>Other Libraries:</b> +<ul> +<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a> +<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a> +<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a> +<li> <a href="http://www.pcre.org/">PCRE</a> +<li> <a href="http://re2c.org/">re2c</a> +<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a> +<li> <a href="http://laurikari.net/tre/">TRE</a> +<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a> +<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a> +<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a> +<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a> +</ul> +</dl> + +<hr> +<a href="../">Back to Home</a> +</body> +</html> diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h new file mode 100644 index 0000000..d9b1419 --- /dev/null +++ b/ext/mbstring/oniguruma/onigcmpt200.h @@ -0,0 +1,310 @@ +/********************************************************************** + + onigcmpt200.h - Oniguruma (regular expression library) + + Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp) + +**********************************************************************/ +#ifndef ONIGCMPT200_H +#define ONIGCMPT200_H + +/* constants */ +#define REG_MAX_ERROR_MESSAGE_LEN ONIG_MAX_ERROR_MESSAGE_LEN + +#define RegCharEncoding OnigEncoding + +#define REG_ENCODING_ASCII ONIG_ENCODING_ASCII +#define REG_ENCODING_ISO_8859_1 ONIG_ENCODING_ISO_8859_1 +#define REG_ENCODING_ISO_8859_15 ONIG_ENCODING_ISO_8859_15 +#define REG_ENCODING_UTF8 ONIG_ENCODING_UTF8 +#define REG_ENCODING_EUC_JP ONIG_ENCODING_EUC_JP +#define REG_ENCODING_SJIS ONIG_ENCODING_SJIS +#define REG_ENCODING_BIG5 ONIG_ENCODING_BIG5 +#define REG_ENCODING_UNDEF ONIG_ENCODING_UNDEF + +/* Don't use REGCODE_XXXX. (obsoleted) */ +#define REGCODE_UNDEF REG_ENCODING_UNDEF +#define REGCODE_ASCII REG_ENCODING_ASCII +#define REGCODE_UTF8 REG_ENCODING_UTF8 +#define REGCODE_EUCJP REG_ENCODING_EUC_JP +#define REGCODE_SJIS REG_ENCODING_SJIS + +/* Don't use REGCODE_XXXX. (obsoleted) */ +#define MBCTYPE_ASCII RE_MBCTYPE_ASCII +#define MBCTYPE_EUC RE_MBCTYPE_EUC +#define MBCTYPE_SJIS RE_MBCTYPE_SJIS +#define MBCTYPE_UTF8 RE_MBCTYPE_UTF8 + +typedef unsigned char* RegTransTableType; +#define RegOptionType OnigOptionType +#define RegDistance OnigDistance + +#define REG_OPTION_DEFAULT ONIG_OPTION_DEFAULT + +/* options */ +#define REG_OPTION_NONE ONIG_OPTION_NONE +#define REG_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE +#define REG_OPTION_MULTILINE ONIG_OPTION_MULTILINE +#define REG_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE +#define REG_OPTION_EXTEND ONIG_OPTION_EXTEND +#define REG_OPTION_FIND_LONGEST ONIG_OPTION_FIND_LONGEST +#define REG_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY +#define REG_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE +#define REG_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP +#define REG_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP +#define REG_OPTION_NOTBOL ONIG_OPTION_NOTBOL +#define REG_OPTION_NOTEOL ONIG_OPTION_NOTEOL +#define REG_OPTION_POSIX_REGION ONIG_OPTION_POSIX_REGION + +#define REG_OPTION_ON ONIG_OPTION_ON +#define REG_OPTION_OFF ONIG_OPTION_OFF +#define IS_REG_OPTION_ON ONIG_IS_OPTION_ON + +/* syntax */ +#define RegSyntaxType OnigSyntaxType + +#define RegSyntaxPosixBasic OnigSyntaxPosixBasic +#define RegSyntaxPosixExtended OnigSyntaxPosixExtended +#define RegSyntaxEmacs OnigSyntaxEmacs +#define RegSyntaxGrep OnigSyntaxGrep +#define RegSyntaxGnuRegex OnigSyntaxGnuRegex +#define RegSyntaxJava OnigSyntaxJava +#define RegSyntaxPerl OnigSyntaxPerl +#define RegSyntaxRuby OnigSyntaxRuby + +#define REG_SYNTAX_POSIX_BASIC ONIG_SYNTAX_POSIX_BASIC +#define REG_SYNTAX_POSIX_EXTENDED ONIG_SYNTAX_POSIX_EXTENDED +#define REG_SYNTAX_EMACS ONIG_SYNTAX_EMACS +#define REG_SYNTAX_GREP ONIG_SYNTAX_GREP +#define REG_SYNTAX_GNU_REGEX ONIG_SYNTAX_GNU_REGEX +#define REG_SYNTAX_JAVA ONIG_SYNTAX_JAVA +#define REG_SYNTAX_PERL ONIG_SYNTAX_PERL +#define REG_SYNTAX_RUBY ONIG_SYNTAX_RUBY + +#define REG_SYNTAX_DEFAULT ONIG_SYNTAX_DEFAULT +#define RegDefaultSyntax OnigDefaultSyntax + +/* syntax (operators) */ +#define REG_SYN_OP_VARIABLE_META_CHARACTERS \ + ONIG_SYN_OP_VARIABLE_META_CHARACTERS +#define REG_SYN_OP_DOT_ANYCHAR \ + ONIG_SYN_OP_DOT_ANYCHAR +#define REG_SYN_OP_ASTERISK_ZERO_INF \ + ONIG_SYN_OP_ASTERISK_ZERO_INF +#define REG_SYN_OP_ESC_ASTERISK_ZERO_INF \ + ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF +#define REG_SYN_OP_PLUS_ONE_INF \ + ONIG_SYN_OP_PLUS_ONE_INF +#define REG_SYN_OP_ESC_PLUS_ONE_INF \ + ONIG_SYN_OP_ESC_PLUS_ONE_INF +#define REG_SYN_OP_QMARK_ZERO_ONE \ + ONIG_SYN_OP_QMARK_ZERO_ONE +#define REG_SYN_OP_ESC_QMARK_ZERO_ONE \ + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE +#define REG_SYN_OP_BRACE_INTERVAL \ + ONIG_SYN_OP_BRACE_INTERVAL +#define REG_SYN_OP_ESC_BRACE_INTERVAL \ + ONIG_SYN_OP_ESC_BRACE_INTERVAL +#define REG_SYN_OP_VBAR_ALT \ + ONIG_SYN_OP_VBAR_ALT +#define REG_SYN_OP_ESC_VBAR_ALT \ + ONIG_SYN_OP_ESC_VBAR_ALT +#define REG_SYN_OP_LPAREN_SUBEXP \ + ONIG_SYN_OP_LPAREN_SUBEXP +#define REG_SYN_OP_ESC_LPAREN_SUBEXP \ + ONIG_SYN_OP_ESC_LPAREN_SUBEXP +#define REG_SYN_OP_ESC_AZ_BUF_ANCHOR \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR +#define REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR \ + ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR +#define REG_SYN_OP_DECIMAL_BACKREF \ + ONIG_SYN_OP_DECIMAL_BACKREF +#define REG_SYN_OP_BRACKET_CC \ + ONIG_SYN_OP_BRACKET_CC +#define REG_SYN_OP_ESC_W_WORD \ + ONIG_SYN_OP_ESC_W_WORD +#define REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END \ + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END +#define REG_SYN_OP_ESC_B_WORD_BOUND \ + ONIG_SYN_OP_ESC_B_WORD_BOUND +#define REG_SYN_OP_ESC_S_WHITE_SPACE \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE +#define REG_SYN_OP_ESC_D_DIGIT \ + ONIG_SYN_OP_ESC_D_DIGIT +#define REG_SYN_OP_LINE_ANCHOR \ + ONIG_SYN_OP_LINE_ANCHOR +#define REG_SYN_OP_POSIX_BRACKET \ + ONIG_SYN_OP_POSIX_BRACKET +#define REG_SYN_OP_QMARK_NON_GREEDY \ + ONIG_SYN_OP_QMARK_NON_GREEDY +#define REG_SYN_OP_ESC_CONTROL_CHARS \ + ONIG_SYN_OP_ESC_CONTROL_CHARS +#define REG_SYN_OP_ESC_C_CONTROL \ + ONIG_SYN_OP_ESC_C_CONTROL +#define REG_SYN_OP_ESC_OCTAL3 \ + ONIG_SYN_OP_ESC_OCTAL3 +#define REG_SYN_OP_ESC_X_HEX2 \ + ONIG_SYN_OP_ESC_X_HEX2 +#define REG_SYN_OP_ESC_X_BRACE_HEX8 \ + ONIG_SYN_OP_ESC_X_BRACE_HEX8 + +#define REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE \ + ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE +#define REG_SYN_OP2_QMARK_GROUP_EFFECT \ + ONIG_SYN_OP2_QMARK_GROUP_EFFECT +#define REG_SYN_OP2_OPTION_PERL \ + ONIG_SYN_OP2_OPTION_PERL +#define REG_SYN_OP2_OPTION_RUBY \ + ONIG_SYN_OP2_OPTION_RUBY +#define REG_SYN_OP2_PLUS_POSSESSIVE_REPEAT \ + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT +#define REG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL \ + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL +#define REG_SYN_OP2_CCLASS_SET_OP \ + ONIG_SYN_OP2_CCLASS_SET_OP +#define REG_SYN_OP2_QMARK_LT_NAMED_GROUP \ + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP +#define REG_SYN_OP2_ESC_K_NAMED_BACKREF \ + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF +#define REG_SYN_OP2_ESC_G_SUBEXP_CALL \ + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL +#define REG_SYN_OP2_ATMARK_CAPTURE_HISTORY \ + ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY +#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL \ + ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL +#define REG_SYN_OP2_ESC_CAPITAL_M_BAR_META \ + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META +#define REG_SYN_OP2_ESC_V_VTAB \ + ONIG_SYN_OP2_ESC_V_VTAB +#define REG_SYN_OP2_ESC_U_HEX4 \ + ONIG_SYN_OP2_ESC_U_HEX4 +#define REG_SYN_OP2_ESC_GNU_BUF_ANCHOR \ + ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + +#define REG_SYN_CONTEXT_INDEP_ANCHORS \ + ONIG_SYN_CONTEXT_INDEP_ANCHORS +#define REG_SYN_CONTEXT_INDEP_REPEAT_OPS \ + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS +#define REG_SYN_CONTEXT_INVALID_REPEAT_OPS \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS +#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP \ + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP +#define REG_SYN_ALLOW_INVALID_INTERVAL \ + ONIG_SYN_ALLOW_INVALID_INTERVAL +#define REG_SYN_STRICT_CHECK_BACKREF \ + ONIG_SYN_STRICT_CHECK_BACKREF +#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND \ + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND +#define REG_SYN_CAPTURE_ONLY_NAMED_GROUP \ + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP +#define REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME \ + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME + +#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC \ + ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC +#define REG_SYN_BACKSLASH_ESCAPE_IN_CC \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC +#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC \ + ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC +#define REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC \ + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC +#define REG_SYN_WARN_CC_OP_NOT_ESCAPED \ + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED +#define REG_SYN_WARN_REDUNDANT_NESTED_REPEAT \ + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT + +/* meta character specifiers (regex_set_meta_char()) */ +#define REG_META_CHAR_ESCAPE ONIG_META_CHAR_ESCAPE +#define REG_META_CHAR_ANYCHAR ONIG_META_CHAR_ANYCHAR +#define REG_META_CHAR_ANYTIME ONIG_META_CHAR_ANYTIME +#define REG_META_CHAR_ZERO_OR_ONE_TIME ONIG_META_CHAR_ZERO_OR_ONE_TIME +#define REG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ONE_OR_MORE_TIME +#define REG_META_CHAR_ANYCHAR_ANYTIME ONIG_META_CHAR_ANYCHAR_ANYTIME + +#define REG_INEFFECTIVE_META_CHAR ONIG_INEFFECTIVE_META_CHAR + +/* error codes */ +#define REG_IS_PATTERN_ERROR ONIG_IS_PATTERN_ERROR +/* normal return */ +#define REG_NORMAL ONIG_NORMAL +#define REG_MISMATCH ONIG_MISMATCH +#define REG_NO_SUPPORT_CONFIG ONIG_NO_SUPPORT_CONFIG +/* internal error */ +#define REGERR_MEMORY ONIGERR_MEMORY +#define REGERR_MATCH_STACK_LIMIT_OVER ONIGERR_MATCH_STACK_LIMIT_OVER +#define REGERR_TYPE_BUG ONIGERR_TYPE_BUG +#define REGERR_PARSER_BUG ONIGERR_PARSER_BUG +#define REGERR_STACK_BUG ONIGERR_STACK_BUG +#define REGERR_UNDEFINED_BYTECODE ONIGERR_UNDEFINED_BYTECODE +#define REGERR_UNEXPECTED_BYTECODE ONIGERR_UNEXPECTED_BYTECODE +#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED \ + ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED +#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR \ + ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR +/* general error */ +#define REGERR_INVALID_ARGUMENT ONIGERR_INVALID_ARGUMENT +/* errors related to thread */ +#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT \ + ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define REG_MAX_CAPTURE_HISTORY_GROUP ONIG_MAX_CAPTURE_HISTORY_GROUP +#define REG_IS_CAPTURE_HISTORY_GROUP ONIG_IS_CAPTURE_HISTORY_GROUP + +#define REG_REGION_NOTPOS ONIG_REGION_NOTPOS + +#define RegRegion OnigRegion +#define RegErrorInfo OnigErrorInfo +#define RegRepeatRange OnigRepeatRange + +#define RegWarnFunc OnigWarnFunc +#define regex_null_warn onig_null_warn +#define REG_NULL_WARN ONIG_NULL_WARN + +/* regex_t state */ +#define REG_STATE_NORMAL ONIG_STATE_NORMAL +#define REG_STATE_SEARCHING ONIG_STATE_SEARCHING +#define REG_STATE_COMPILING ONIG_STATE_COMPILING +#define REG_STATE_MODIFY ONIG_STATE_MODIFY + +#define REG_STATE ONIG_STATE + +/* Oniguruma Native API */ +#define regex_init onig_init +#define regex_error_code_to_str onig_error_code_to_str +#define regex_set_warn_func onig_set_warn_func +#define regex_set_verb_warn_func onig_set_verb_warn_func +#define regex_new onig_new +#define regex_free onig_free +#define regex_recompile onig_recompile +#define regex_search onig_search +#define regex_match onig_match +#define regex_region_new onig_region_new +#define regex_region_free onig_region_free +#define regex_region_copy onig_region_copy +#define regex_region_clear onig_region_clear +#define regex_region_resize onig_region_resize +#define regex_name_to_group_numbers onig_name_to_group_numbers +#define regex_name_to_backref_number onig_name_to_backref_number +#define regex_foreach_name onig_foreach_name +#define regex_number_of_names onig_number_of_names +#define regex_get_encoding onig_get_encoding +#define regex_get_options onig_get_options +#define regex_get_syntax onig_get_syntax +#define regex_set_default_syntax onig_set_default_syntax +#define regex_copy_syntax onig_copy_syntax +#define regex_set_meta_char onig_set_meta_char +#define regex_end onig_end +#define regex_version onig_version + +/* encoding API */ +#define enc_get_prev_char_head onigenc_get_prev_char_head +#define enc_get_left_adjust_char_head onigenc_get_left_adjust_char_head +#define enc_get_right_adjust_char_head onigenc_get_right_adjust_char_head +/* obsoleted API */ +#define regex_get_prev_char_head onigenc_get_prev_char_head +#define regex_get_left_adjust_char_head onigenc_get_left_adjust_char_head +#define regex_get_right_adjust_char_head onigenc_get_right_adjust_char_head + +#endif /* ONIGCMPT200_H */ diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h new file mode 100644 index 0000000..3da9f23 --- /dev/null +++ b/ext/mbstring/oniguruma/oniggnu.h @@ -0,0 +1,85 @@ +#ifndef ONIGGNU_H +#define ONIGGNU_H +/********************************************************************** + oniggnu.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "oniguruma.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define RE_MBCTYPE_ASCII 0 +#define RE_MBCTYPE_EUC 1 +#define RE_MBCTYPE_SJIS 2 +#define RE_MBCTYPE_UTF8 3 + +/* GNU regex options */ +#ifndef RE_NREGS +#define RE_NREGS ONIG_NREGION +#endif + +#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE +#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND +#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE +#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE +#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST +#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) +#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY +#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE +#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP +#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP + + +ONIG_EXTERN +void re_mbcinit P_((int)); +ONIG_EXTERN +int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +void re_free_pattern P_((struct re_pattern_buffer*)); +ONIG_EXTERN +int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); +ONIG_EXTERN +int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); +ONIG_EXTERN +int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); +ONIG_EXTERN +void re_set_casetable P_((const char*)); +ONIG_EXTERN +void re_free_registers P_((struct re_registers*)); +ONIG_EXTERN +int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGGNU_H */ diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h new file mode 100644 index 0000000..cfeb88a --- /dev/null +++ b/ext/mbstring/oniguruma/onigposix.h @@ -0,0 +1,169 @@ +#ifndef ONIGPOSIX_H +#define ONIGPOSIX_H +/********************************************************************** + onigposix.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* options */ +#define REG_ICASE (1<<0) +#define REG_NEWLINE (1<<1) +#define REG_NOTBOL (1<<2) +#define REG_NOTEOL (1<<3) +#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ +#define REG_NOSUB (1<<5) + +/* POSIX error codes */ +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 + +/* extended error codes */ +#define REG_EONIG_INTERNAL 14 +#define REG_EONIG_BADWC 15 +#define REG_EONIG_BADARG 16 +#define REG_EONIG_THREAD 17 + +/* character encodings (for reg_set_encoding()) */ +#define REG_POSIX_ENCODING_ASCII 0 +#define REG_POSIX_ENCODING_EUC_JP 1 +#define REG_POSIX_ENCODING_SJIS 2 +#define REG_POSIX_ENCODING_UTF8 3 +#define REG_POSIX_ENCODING_UTF16_BE 4 +#define REG_POSIX_ENCODING_UTF16_LE 5 + + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* POSIX regex_t */ +typedef struct { + void* onig; /* Oniguruma regex_t* */ + size_t re_nsub; + int comp_options; +} regex_t; + + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +#ifndef ONIGURUMA_H +typedef unsigned int OnigOptionType; + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) +/* default syntax */ +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; + +ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN const char* onig_version P_((void)); +ONIG_EXTERN const char* onig_copyright P_((void)); + +#endif /* ONIGURUMA_H */ + + +ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); +ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); +ONIG_EXTERN void regfree P_((regex_t* reg)); +ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); + +/* extended API */ +ONIG_EXTERN void reg_set_encoding P_((int enc)); +ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); +ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGPOSIX_H */ diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h new file mode 100644 index 0000000..5196a3d --- /dev/null +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -0,0 +1,905 @@ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H +/********************************************************************** + oniguruma.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "../php_onig_compat.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ONIGURUMA +#define ONIGURUMA_VERSION_MAJOR 4 +#define ONIGURUMA_VERSION_MINOR 7 +#define ONIGURUMA_VERSION_TEENY 1 + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef PV_ +#ifdef HAVE_STDARG_PROTOTYPES +# define PV_(args) args +#else +# define PV_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +/* PART: character encoding */ + +#ifndef ONIG_ESCAPE_UCHAR_COLLISION +#define UChar OnigUChar +#endif + +typedef unsigned char OnigUChar; +typedef unsigned long OnigCodePoint; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +/* ambiguous match flag */ +typedef unsigned int OnigAmbigType; + +ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag; + +#define ONIGENC_AMBIGUOUS_MATCH_NONE 0 +#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) +#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1) + +#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1) + +#define ONIGENC_AMBIGUOUS_MATCH_FULL \ + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ) +#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag + + +#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3 +#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4 + +/* code range */ +#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) +#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] +#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] + +typedef struct { + int len; + OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN]; +} OnigCompAmbigCodeItem; + +typedef struct { + int n; + OnigCodePoint code; + OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM]; +} OnigCompAmbigCodes; + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairAmbigCodes; + +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; + + +#if defined(RUBY_PLATFORM) && defined(M17N_H) + +#define ONIG_RUBY_M17N +typedef m17n_encoding* OnigEncoding; + +#else + +typedef struct { + int (*mbc_enc_len)(const OnigUChar* p); + const char* name; + int max_enc_len; + int min_enc_len; + OnigAmbigType support_ambig_flag; + OnigMetaCharTableType meta_char_table; + int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end); + OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end); + int (*code_to_mbclen)(OnigCodePoint code); + int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); + int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); + int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end); + int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs); + int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs); + int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); + int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]); + OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); + int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; +ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; +ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) +#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) +#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) +#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) +#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) +#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) +#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) +#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) +#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) +#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) +#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) +#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) +#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) +#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) +#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) +#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) +#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) +#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) +#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE) +#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) +#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) +#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) +#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) +#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) +#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) +#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) +#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) + +#endif /* else RUBY && M17N */ + +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN + +/* character types */ +#define ONIGENC_CTYPE_NEWLINE (1<< 0) +#define ONIGENC_CTYPE_ALPHA (1<< 1) +#define ONIGENC_CTYPE_BLANK (1<< 2) +#define ONIGENC_CTYPE_CNTRL (1<< 3) +#define ONIGENC_CTYPE_DIGIT (1<< 4) +#define ONIGENC_CTYPE_GRAPH (1<< 5) +#define ONIGENC_CTYPE_LOWER (1<< 6) +#define ONIGENC_CTYPE_PRINT (1<< 7) +#define ONIGENC_CTYPE_PUNCT (1<< 8) +#define ONIGENC_CTYPE_SPACE (1<< 9) +#define ONIGENC_CTYPE_UPPER (1<<10) +#define ONIGENC_CTYPE_XDIGIT (1<<11) +#define ONIGENC_CTYPE_WORD (1<<12) +#define ONIGENC_CTYPE_ASCII (1<<13) +#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) + +#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#ifdef ONIG_RUBY_M17N + +#include <ctype.h> /* for isblank(), isgraph() */ + +#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ + onigenc_mbc_to_normalize(enc,flag,pp,end,buf) +#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ + onigenc_is_mbc_ambiguous(enc,flag,pp,end) + +#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + onigenc_is_allowed_reverse_match(enc, s, end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + onigenc_get_left_adjust_char_head(enc, start, s) +#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0 +#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0 +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ + ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p)) +#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) +#define ONIGENC_MBC_MAXLEN_DIST(enc) \ + (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ + : ONIG_INFINITE_DISTANCE) +#define ONIGENC_MBC_MINLEN(enc) 1 +#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) + +#if 0 /* !! not supported !! */ +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) +#define ONIGENC_STEP_BACK(enc,start,s,n) +#endif + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ + onigenc_is_code_ctype(enc,code,ctype) + +#ifdef isblank +# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code) +#else +# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t') +#endif +#ifdef isgraph +# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code) +#else +# define ONIGENC_IS_CODE_GRAPH(enc,code) \ + (isprint((int )code) && !isspace((int )code)) +#endif + +#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code) +#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code) +#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code) +#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code) +#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code) +#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code) +#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code) +#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code) +#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code) +#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code) + +ONIG_EXTERN +int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); +ONIG_EXTERN +int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf)); +ONIG_EXTERN +int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf)); +ONIG_EXTERN +int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)); +ONIG_EXTERN +int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); + +#else /* ONIG_RUBY_M17N */ + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ + (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf) +#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ + (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end) +#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + (enc)->left_adjust_char_head(start, s) +#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \ + (enc)->get_all_pair_ambig_codes(ambig_flag,acs) +#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \ + (enc)->get_all_comp_ambig_codes(ambig_flag,acs) +#define ONIGENC_STEP_BACK(enc,start,s,n) \ + onigenc_step_back((enc),(start),(s),(n)) + +#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end)) +#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) + +#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \ + (enc)->get_ctype_code_range(ctype,sbr,mbr) + +ONIG_EXTERN +OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n)); + +#endif /* is not ONIG_RUBY_M17N */ + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_((void)); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_((void)); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev)); +ONIG_EXTERN +OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end)); +ONIG_EXTERN +int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); +ONIG_EXTERN +int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +typedef unsigned int OnigOptionType; + +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE + +/* options */ +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) +/* options (search time) */ +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ + +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) + +/* default syntax */ +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +/* syntax (operators) */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ + +/* syntax (behavior) */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 + +/* error codes */ +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) +/* normal return */ +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 + +/* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 +/* syntax error */ +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_ESCAPE -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 +/* values error (syntax error) */ +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 + +/* errors related to thread */ +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) + +typedef struct OnigCaptureTreeNodeStruct { + int group; /* group number */ + int beg; + int end; + int allocated; + int num_childs; + struct OnigCaptureTreeNodeStruct** childs; +} OnigCaptureTreeNode; + +/* match result region type */ +struct re_registers { + int allocated; + int num_regs; + int* beg; + int* end; + /* extended */ + OnigCaptureTreeNode* history_root; /* capture history tree root */ +}; + +/* capture tree traverse */ +#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 +#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 +#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ + ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) + + +#define ONIG_REGION_NOTPOS -1 + +typedef struct re_registers OnigRegion; + +typedef struct { + OnigEncoding enc; + OnigUChar* par; + OnigUChar* par_end; +} OnigErrorInfo; + +typedef struct { + int lower; + int upper; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((const char* s)); +extern void onig_null_warn P_((const char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 + +/* regex_t state */ +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 + +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) + +typedef struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_comb_exp_check; /* combination explosion check */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + OnigAmbigType ambig_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif + + +typedef struct { + int num_of_elements; + OnigEncoding pattern_enc; + OnigEncoding target_enc; + OnigSyntaxType* syntax; + OnigOptionType option; + OnigAmbigType ambig_flag; +} OnigCompileInfo; + +/* Oniguruma Native API */ +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((OnigRegex)); +ONIG_EXTERN +int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_init P_((OnigRegion* region)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); +ONIG_EXTERN +int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_captures P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_capture_histories P_((OnigRegex reg)); +ONIG_EXTERN +OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); +ONIG_EXTERN +int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); +ONIG_EXTERN +int onig_noname_group_capture_is_active P_((OnigRegex reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((OnigRegex reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((OnigRegex reg)); +ONIG_EXTERN +OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg)); +ONIG_EXTERN +OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN +unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code)); +ONIG_EXTERN +void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); +ONIG_EXTERN +OnigAmbigType onig_get_default_ambig_flag P_((void)); +ONIG_EXTERN +int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag)); +ONIG_EXTERN +unsigned int onig_get_match_stack_limit_size P_((void)); +ONIG_EXTERN +int onig_set_match_stack_limit_size P_((unsigned int size)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); +ONIG_EXTERN +const char* onig_copyright P_((void)); + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGURUMA_H */ diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c new file mode 100644 index 0000000..6a0976d --- /dev/null +++ b/ext/mbstring/oniguruma/regcomp.c @@ -0,0 +1,6044 @@ +/********************************************************************** + regcomp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +OnigAmbigType OnigDefaultAmbigFlag = + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); + +extern OnigAmbigType +onig_get_default_ambig_flag(void) +{ + return OnigDefaultAmbigFlag; +} + +extern int +onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +{ + OnigDefaultAmbigFlag = ambig_flag; + return 0; +} + + +static UChar* +k_strdup(UChar* s, UChar* end) +{ + int len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + +/* + Caution: node should not be a string node. + (s and end member address break) +*/ +static void +swap_node(Node* a, Node* b) +{ + Node c; + c = *a; *a = *b; *b = c; +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; + else { + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; + } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ + if (m == 0) return 0; + + if (d < ONIG_INFINITE_DISTANCE / m) + return d * m; + else + return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + if (bs[i] != 0) return 0; + } + return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + + buf->alloc = size; + buf->used = 0; + return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ + UnsetAddr* p; + + p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + uslist->num = 0; + uslist->alloc = size; + uslist->us = p; + return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ + if (IS_NOT_NULL(uslist->us)) + xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ + UnsetAddr* p; + int size; + + if (uslist->num >= uslist->alloc) { + size = uslist->alloc * 2; + p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + uslist->alloc = size; + uslist->us = p; + } + + uslist->us[uslist->num].offset = offset; + uslist->us[uslist->num].target = node; + uslist->num++; + return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ + BBUF_ADD1(reg, opcode); + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ + StateCheckNumType n = (StateCheckNumType )num; + + BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); + return 0; +} +#endif + +static int +add_rel_addr(regex_t* reg, int addr) +{ + RelAddrType ra = (RelAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_RELADDR); + return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ + AbsAddrType ra = (AbsAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_ABSADDR); + return 0; +} + +static int +add_length(regex_t* reg, int len) +{ + LengthType l = (LengthType )len; + + BBUF_ADD(reg, &l, SIZE_LENGTH); + return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ + MemNumType n = (MemNumType )num; + + BBUF_ADD(reg, &n, SIZE_MEMNUM); + return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ + PointerType ptr = (PointerType )addr; + + BBUF_ADD(reg, &ptr, SIZE_POINTER); + return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ + BBUF_ADD(reg, &option, SIZE_OPTION); + return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_rel_addr(reg, addr); + return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ + BBUF_ADD(reg, bytes, len); + return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ + BBUF_ADD(reg, bs, SIZE_BITSET); + return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_option(reg, option); + return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ + ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ + (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ + int op; + + if (ignore_case) { + switch (str_len) { + case 1: op = OP_EXACT1_IC; break; + default: op = OP_EXACTN_IC; break; + } + } + else { + switch (mb_len) { + case 1: + switch (str_len) { + case 1: op = OP_EXACT1; break; + case 2: op = OP_EXACT2; break; + case 3: op = OP_EXACT3; break; + case 4: op = OP_EXACT4; break; + case 5: op = OP_EXACT5; break; + default: op = OP_EXACTN; break; + } + break; + + case 2: + switch (str_len) { + case 1: op = OP_EXACTMB2N1; break; + case 2: op = OP_EXACTMB2N2; break; + case 3: op = OP_EXACTMB2N3; break; + default: op = OP_EXACTMB2N; break; + } + break; + + case 3: + op = OP_EXACTMB3N; + break; + + default: + op = OP_EXACTMBN; + break; + } + } + return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ + int r; + int saved_num_null_check = reg->num_null_check; + + if (empty_info != 0) { + r = add_opcode(reg, OP_NULL_CHECK_START); + if (r) return r; + r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ + if (r) return r; + reg->num_null_check++; + } + + r = compile_tree(node, reg); + if (r) return r; + + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + + if (r) return r; + r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ + } + return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ + int r; + + r = add_opcode(reg, OP_CALL); + if (r) return r; + r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + node->target); + if (r) return r; + r = add_abs_addr(reg, 0 /*dummy addr.*/); + return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ + int i, r; + + for (i = 0; i < n; i++) { + r = compile_tree(node, reg); + if (r) return r; + } + return 0; +} + +static int +add_compile_string_length(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int len; + int op = select_str_opcode(mb_len, str_len, ignore_case); + + len = SIZE_OPCODE; + + if (op == OP_EXACTMBN) len += SIZE_LENGTH; + if (IS_NEED_STR_LEN_OP_EXACT(op)) + len += SIZE_LENGTH; + + len += mb_len * str_len; + return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int op = select_str_opcode(mb_len, str_len, ignore_case); + add_opcode(reg, op); + + if (op == OP_EXACTMBN) + add_length(reg, mb_len); + + if (IS_NEED_STR_LEN_OP_EXACT(op)) { + if (op == OP_EXACTN_IC) + add_length(reg, mb_len * str_len); + else + add_length(reg, str_len); + } + + add_bytes(reg, s, mb_len * str_len); + return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ + int rlen, r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev; + StrNode* sn; + + sn = &(NSTRING(node)); + if (sn->end <= sn->s) + return 0; + + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enc_len(enc, p); + p += prev_len; + slen = 1; + rlen = 0; + + for (; p < sn->end; ) { + len = enc_len(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + prev = p; + slen = 1; + prev_len = len; + } + p += len; + } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ + int r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev, *end; + StrNode* sn; + + sn = &(NSTRING(node)); + if (sn->end <= sn->s) + return 0; + + end = sn->end; + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enc_len(enc, p); + p += prev_len; + slen = 1; + + for (; p < end; ) { + len = enc_len(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string(prev, prev_len, slen, reg, ambig); + if (r) return r; + + prev = p; + slen = 1; + prev_len = len; + } + + p += len; + } + return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); +#else + static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; + + int r, pad_size; + UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + + GET_ALIGNMENT_PAD_SIZE(p, pad_size); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + + r = add_bytes(reg, mbuf->p, mbuf->used); + + /* padding for return value from compile_length_cclass_node() to be fix. */ + pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ + int len; + + if (IS_CCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + + if (IS_NULL(cc->mbuf)) { + len = SIZE_OPCODE + SIZE_BITSET; + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + len = SIZE_OPCODE; + } + else { + len = SIZE_OPCODE + SIZE_BITSET; + } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + len += SIZE_LENGTH + cc->mbuf->used; +#else + len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif + } + + return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ + int r; + + if (IS_CCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + + if (IS_NULL(cc->mbuf)) { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); + + r = add_bitset(reg, cc->bs); + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); + + r = add_multi_byte_cclass(cc->mbuf, reg); + } + else { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); + + r = add_bitset(reg, cc->bs); + if (r) return r; + r = add_multi_byte_cclass(cc->mbuf, reg); + } + } + + return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC 4 + + OnigRepeatRange* p; + + if (reg->repeat_range_alloc == 0) { + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + reg->repeat_range = p; + reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; + } + else if (reg->repeat_range_alloc <= id) { + int n; + n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + reg->repeat_range = p; + reg->repeat_range_alloc = n; + } + else { + p = reg->repeat_range; + } + + p[id].lower = lower; + p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); + return 0; +} + +static int +compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, + regex_t* reg) +{ + int r; + int num_repeat = reg->num_repeat; + + r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + reg->num_repeat++; + if (r) return r; + r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); + if (r) return r; + + r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); + if (r) return r; + + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + + if ( +#ifdef USE_SUBEXP_CALL + reg->num_call > 0 || +#endif + IS_QUANTIFIER_IN_REPEAT(qn)) { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); + } + else { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); + } + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + return r; +} + +static int +is_anychar_star_quantifier(QuantifierNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == N_ANYCHAR) + return 1; + else + return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 +#define CKN_ON (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +static int +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int len, mod_tlen, cklen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + + /* anychar repeat */ + if (NTYPE(qn->target) == N_ANYCHAR) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) + len = SIZE_OP_JUMP; + else + len = 0; + + len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; + } + else { + if (qn->lower == 0) + len = SIZE_OP_JUMP; + else + len = 0; + + len += mod_tlen + SIZE_OP_PUSH + cklen; + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) /* /(?<n>..){0}/ */ + len = SIZE_OP_JUMP + tlen; + else + len = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + len = SIZE_OP_STATE_CHECK_PUSH + tlen; + } + else { + len = SIZE_OP_PUSH + tlen; + } + } + else { + len = tlen; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + if (CKN_ON) + len += SIZE_OP_STATE_CHECK; + } + + return len; +} + +static int +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int r, mod_tlen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + if (CKN_ON) { + r = add_state_check_num(reg, ckn); + if (r) return r; + } + + return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + } + else { + if (IS_MULTILINE(reg->options)) { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); + } + else { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); + } + if (r) return r; + if (CKN_ON) + r = add_state_check_num(reg, ckn); + + return r; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) { + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; + } + + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + } + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + } + else { + if (qn->lower == 0) { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + } + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + } + else + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else + r = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + } + if (r) return r; + } + + r = compile_tree(qn->target, reg); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + } + + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + if (CKN_ON) { + if (r) return r; + r = add_opcode(reg, OP_STATE_CHECK); + if (r) return r; + r = add_state_check_num(reg, ckn); + } + } + return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int len, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + /* anychar repeat */ + if (NTYPE(qn->target) == N_ANYCHAR) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact)) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + len = SIZE_OP_JUMP; + } + else { + len = tlen * qn->lower; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + else if (IS_NOT_NULL(qn->next_head_exact)) + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + else + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + } + else + len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ + len = SIZE_OP_JUMP + tlen; + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + len = tlen * qn->lower; + len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + } + + return len; +} + +static int +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int i, r, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + } + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + } + if (r) return r; + } + else { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTRING(qn->head_exact).s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + } + else if (IS_NOT_NULL(qn->next_head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + } + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn->upper - qn->lower; + + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + for (i = 0; i < n; i++) { + r = add_opcode_rel_addr(reg, OP_PUSH, + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + } + return r; +} +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_option_node(EffectNode* node, regex_t* reg) +{ + int tlen; + OnigOptionType prev = reg->options; + + reg->options = node->option; + tlen = compile_length_tree(node->target, reg); + reg->options = prev; + + if (tlen < 0) return tlen; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; +} + +static int +compile_option_node(EffectNode* node, regex_t* reg) +{ + int r; + OnigOptionType prev = reg->options; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; + } + + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + return r; +} + +static int +compile_length_effect_node(EffectNode* node, regex_t* reg) +{ + int len; + int tlen; + + if (node->type == EFFECT_OPTION) + return compile_length_option_node(node, reg); + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + else + tlen = 0; + + switch (node->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + len = SIZE_OP_MEMORY_START_PUSH + tlen + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + len = SIZE_OP_MEMORY_START_PUSH; + else + len = SIZE_OP_MEMORY_START; + + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + } + break; + + case EFFECT_STOP_BACKTRACK: + if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { + QuantifierNode* qn = &NQUANTIFIER(node->target); + tlen = compile_length_tree(qn->target, reg); + if (tlen < 0) return tlen; + + len = tlen * qn->lower + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + } + else { + len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_effect_node(EffectNode* node, regex_t* reg) +{ + int r, len; + + if (node->type == EFFECT_OPTION) + return compile_option_node(node, reg); + + switch (node->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r) return r; + node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + node->state |= NST_ADDR_FIXED; + r = add_abs_addr(reg, (int )node->call_addr); + if (r) return r; + len = compile_length_tree(node->target, reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) return r; + } +#endif + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = add_opcode(reg, OP_RETURN); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } + break; + + case EFFECT_STOP_BACKTRACK: + if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { + QuantifierNode* qn = &NQUANTIFIER(node->target); + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + len = compile_length_tree(qn->target, reg); + if (len < 0) return len; + + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + } + else { + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ + int len; + int tlen = 0; + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + + switch (node->type) { + case ANCHOR_PREC_READ: + len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + break; + case ANCHOR_PREC_READ_NOT: + len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + break; + case ANCHOR_LOOK_BEHIND: + len = SIZE_OP_LOOK_BEHIND + tlen; + break; + case ANCHOR_LOOK_BEHIND_NOT: + len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = SIZE_OPCODE; + break; + } + + return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ + int r, len; + + switch (node->type) { + case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; + case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; + case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; +#endif + + case ANCHOR_PREC_READ: + r = add_opcode(reg, OP_PUSH_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_POS); + break; + + case ANCHOR_PREC_READ_NOT: + len = compile_length_tree(node->target, reg); + if (len < 0) return len; + r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_POS); + break; + + case ANCHOR_LOOK_BEHIND: + { + int n; + r = add_opcode(reg, OP_LOOK_BEHIND); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + int n; + len = compile_length_tree(node->target, reg); + r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ + int len, type, r; + + type = NTYPE(node); + switch (type) { + case N_LIST: + len = 0; + do { + r = compile_length_tree(NCONS(node).left, reg); + if (r < 0) return r; + len += r; + } while (IS_NOT_NULL(node = NCONS(node).right)); + r = len; + break; + + case N_ALT: + { + int n; + + n = r = 0; + do { + r += compile_length_tree(NCONS(node).left, reg); + n++; + } while (IS_NOT_NULL(node = NCONS(node).right)); + r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); + } + break; + + case N_STRING: + if (NSTRING_IS_RAW(node)) + r = compile_length_string_raw_node(&(NSTRING(node)), reg); + else + r = compile_length_string_node(node, reg); + break; + + case N_CCLASS: + r = compile_length_cclass_node(&(NCCLASS(node)), reg); + break; + + case N_CTYPE: + case N_ANYCHAR: + r = SIZE_OPCODE; + break; + + case N_BACKREF: + { + BackrefNode* br = &(NBACKREF(node)); + +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + r = SIZE_OP_CALL; + break; +#endif + + case N_QUANTIFIER: + r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); + break; + + case N_EFFECT: + r = compile_length_effect_node(&NEFFECT(node), reg); + break; + + case N_ANCHOR: + r = compile_length_anchor_node(&(NANCHOR(node)), reg); + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ + int n, type, len, pos, r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + do { + r = compile_tree(NCONS(node).left, reg); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + Node* x = node; + len = 0; + do { + len += compile_length_tree(NCONS(x).left, reg); + if (NCONS(x).right != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } + } while (IS_NOT_NULL(x = NCONS(x).right)); + pos = reg->used + len; /* goal position */ + + do { + len = compile_length_tree(NCONS(node).left, reg); + if (IS_NOT_NULL(NCONS(node).right)) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCONS(node).left, reg); + if (r) break; + if (IS_NOT_NULL(NCONS(node).right)) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_STRING: + if (NSTRING_IS_RAW(node)) + r = compile_string_raw_node(&(NSTRING(node)), reg); + else + r = compile_string_node(node, reg); + break; + + case N_CCLASS: + r = compile_cclass_node(&(NCCLASS(node)), reg); + break; + + case N_CTYPE: + { + int op; + + switch (NCTYPE(node).type) { + case CTYPE_WORD: op = OP_WORD; break; + case CTYPE_NOT_WORD: op = OP_NOT_WORD; break; + default: + return ONIGERR_TYPE_BUG; + break; + } + r = add_opcode(reg, op); + } + break; + + case N_ANYCHAR: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + + case N_BACKREF: + { + BackrefNode* br = &(NBACKREF(node)); + +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } + } + else { + int i; + int* p; + + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } + if (r) return r; + +#ifdef USE_BACKREF_AT_LEVEL + add_bacref_mems: +#endif + r = add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + r = compile_call(&(NCALL(node)), reg); + break; +#endif + + case N_QUANTIFIER: + r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); + break; + + case N_EFFECT: + r = compile_effect_node(&NEFFECT(node), reg); + break; + + case N_ANCHOR: + r = compile_anchor_node(&(NANCHOR(node)), reg); + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif + break; + } + + return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = noname_disable_map(&(NCONS(node).left), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUANTIFIER: + { + Node** ptarget = &(NQUANTIFIER(node).target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + onig_reduce_nested_quantifier(node, *ptarget); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + if (en->type == EFFECT_MEMORY) { + if (IS_EFFECT_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ + int i, pos, n, old_num; + int *backs; + BackrefNode* bn = &(NBACKREF(node)); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = renumber_by_map(NCONS(node).left, map); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUANTIFIER: + r = renumber_by_map(NQUANTIFIER(node).target, map); + break; + case N_EFFECT: + r = renumber_by_map(NEFFECT(node).target, map); + break; + + case N_BACKREF: + r = renumber_node_backref(node, map); + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = numbered_ref_check(NCONS(node).left); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUANTIFIER: + r = numbered_ref_check(NQUANTIFIER(node).target); + break; + case N_EFFECT: + r = numbered_ref_check(NEFFECT(node).target); + break; + + case N_BACKREF: + if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + GroupNumRemap* map; + + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + + return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ + int i, offset; + EffectNode* en; + AbsAddrType addr; + + for (i = 0; i < uslist->num; i++) { + en = &(NEFFECT(uslist->us[i].target)); + if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->call_addr; + offset = uslist->us[i].offset; + + BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); + } + return 0; +} +#endif + +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +static int +quantifiers_memory_node_info(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + { + int v; + do { + v = quantifiers_memory_node_info(NCONS(node).left); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = quantifiers_memory_node_info(NCALL(node).target); + break; +#endif + + case N_QUANTIFIER: + { + QuantifierNode* qn = &(NQUANTIFIER(node)); + if (qn->upper != 0) { + r = quantifiers_memory_node_info(qn->target); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = quantifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case N_BACKREF: + case N_STRING: + case N_CTYPE: + case N_CCLASS: + case N_ANYCHAR: + case N_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; + int r = 0; + + *min = 0; + switch (NTYPE(node)) { + case N_BACKREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + if (br->state & NST_RECURSION) break; + + backs = BACKREFS_P(br); + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[0]], min, env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&NCALL(node))) { + EffectNode* en = &(NEFFECT(NCALL(node).target)); + if (IS_EFFECT_MIN_FIXED(en)) + *min = en->min_len; + } + else + r = get_min_match_length(NCALL(node).target, min, env); + break; +#endif + + case N_LIST: + do { + r = get_min_match_length(NCONS(node).left, &tmin, env); + if (r == 0) *min += tmin; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + Node *x, *y; + y = node; + do { + x = NCONS(y).left; + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; + } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right)); + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + *min = sn->end - sn->s; + } + break; + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: *min = 1; break; + case CTYPE_NOT_WORD: *min = 1; break; + default: + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *min = 1; + break; + + case N_QUANTIFIER: + { + QuantifierNode* qn = &(NQUANTIFIER(node)); + + if (qn->lower > 0) { + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_MIN_FIXED(en)) + *min = en->min_len; + else { + r = get_min_match_length(en->target, min, env); + if (r == 0) { + en->min_len = *min; + SET_EFFECT_STATUS(node, NST_MIN_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_min_match_length(en->target, min, env); + break; + } + } + break; + + case N_ANCHOR: + default: + break; + } + + return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ + OnigDistance tmax; + int r = 0; + + *max = 0; + switch (NTYPE(node)) { + case N_LIST: + do { + r = get_max_match_length(NCONS(node).left, &tmax, env); + if (r == 0) + *max = distance_add(*max, tmax); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + do { + r = get_max_match_length(NCONS(node).left, &tmax, env); + if (r == 0 && *max < tmax) *max = tmax; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + *max = sn->end - sn->s; + } + break; + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + default: + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case N_BACKREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + if (br->state & NST_RECURSION) { + *max = ONIG_INFINITE_DISTANCE; + break; + } + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (! IS_CALL_RECURSION(&(NCALL(node)))) + r = get_max_match_length(NCALL(node).target, max, env); + else + *max = ONIG_INFINITE_DISTANCE; + break; +#endif + + case N_QUANTIFIER: + { + QuantifierNode* qn = &(NQUANTIFIER(node)); + + if (qn->upper != 0) { + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_MAX_FIXED(en)) + *max = en->max_len; + else { + r = get_max_match_length(en->target, max, env); + if (r == 0) { + en->max_len = *max; + SET_EFFECT_STATUS(node, NST_MAX_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_max_match_length(en->target, max, env); + break; + } + } + break; + + case N_ANCHOR: + default: + break; + } + + return r; +} + +#define GET_CHAR_LEN_VARLEN -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ + int tlen; + int r = 0; + + level++; + *len = 0; + switch (NTYPE(node)) { + case N_LIST: + do { + r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + if (r == 0) + *len = distance_add(*len, tlen); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + int tlen2; + int varlen = 0; + + r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) { + r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } + } + if (r == 0) { + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; + } + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + UChar *s = sn->s; + while (s < sn->end) { + s += enc_len(reg->enc, s); + (*len)++; + } + } + break; + + case N_QUANTIFIER: + { + QuantifierNode* qn = &(NQUANTIFIER(node)); + if (qn->lower == qn->upper) { + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } + else + r = GET_CHAR_LEN_VARLEN; + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (! IS_CALL_RECURSION(&(NCALL(node)))) + r = get_char_length_tree1(NCALL(node).target, reg, len, level); + else + r = GET_CHAR_LEN_VARLEN; + break; +#endif + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *len = 1; + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *len = 1; + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_EFFECT_STATUS(node, NST_CLEN_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_char_length_tree1(en->target, reg, len, level); + break; + default: + break; + } + } + break; + + case N_ANCHOR: + break; + + default: + r = GET_CHAR_LEN_VARLEN; + break; + } + + return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ + return get_char_length_tree1(node, reg, len, 0); +} + +/* x is not included y ==> 1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ + int i, len; + OnigCodePoint code; + UChar *p, c; + int ytype; + + retry: + ytype = NTYPE(y); + switch (NTYPE(x)) { + case N_CTYPE: + { + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(x).type) { + case CTYPE_WORD: + if (NCTYPE(y).type == CTYPE_NOT_WORD) + return 1; + else + return 0; + break; + case CTYPE_NOT_WORD: + if (NCTYPE(y).type == CTYPE_WORD) + return 1; + else + return 0; + break; + default: + break; + } + break; + + case N_CCLASS: + swap: + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; + + case N_STRING: + goto swap; + break; + + default: + break; + } + } + break; + + case N_CCLASS: + { + CClassNode* xc = &(NCCLASS(x)); + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(y).type) { + case CTYPE_WORD: + if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + } + return 1; + } + return 0; + break; + case CTYPE_NOT_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_CCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + break; + + default: + break; + } + break; + + case N_CCLASS: + { + int v; + CClassNode* yc = &(NCCLASS(y)); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_CCLASS_NOT(xc)) || + (v == 0 && IS_CCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_CCLASS_NOT(yc)) || + (v == 0 && IS_CCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) + return 1; + return 0; + } + break; + + case N_STRING: + goto swap; + break; + + default: + break; + } + } + break; + + case N_STRING: + { + StrNode* xs = &(NSTRING(x)); + if (NSTRING_LEN(x) == 0) + break; + + c = *(xs->s); + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(y).type) { + case CTYPE_WORD: + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1); + break; + case CTYPE_NOT_WORD: + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0); + break; + default: + break; + } + break; + + case N_CCLASS: + { + CClassNode* cc = &(NCCLASS(y)); + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; + + case N_STRING: + { + UChar *q; + StrNode* ys = &(NSTRING(y)); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; + + default: + break; + } + } + break; + + default: + break; + } + + return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ + Node* n = NULL_NODE; + + switch (NTYPE(node)) { + case N_BACKREF: + case N_ALT: + case N_ANYCHAR: +#ifdef USE_SUBEXP_CALL + case N_CALL: +#endif + break; + + case N_CTYPE: + case N_CCLASS: + if (exact == 0) { + n = node; + } + break; + + case N_LIST: + n = get_head_value_node(NCONS(node).left, exact, reg); + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + + if (sn->end <= sn->s) + break; + + if (exact != 0 && + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { +#if 0 + UChar* tmp = sn->s; + if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, + &tmp, sn->end)) + n = node; +#endif + } + else { + n = node; + } + } + break; + + case N_QUANTIFIER: + { + QuantifierNode* qn = &(NQUANTIFIER(node)); + if (qn->lower > 0) { + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else + n = get_head_value_node(qn->target, exact, reg); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType options = reg->options; + + reg->options = NEFFECT(node).option; + n = get_head_value_node(NEFFECT(node).target, exact, reg); + reg->options = options; + } + break; + + case EFFECT_MEMORY: + case EFFECT_STOP_BACKTRACK: + n = get_head_value_node(en->target, exact, reg); + break; + } + } + break; + + case N_ANCHOR: + if (NANCHOR(node).type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node).target, exact, reg); + break; + + default: + break; + } + + return n; +} + +static int +check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) +{ + int type, r = 0; + + type = NTYPE(node); + if ((type & type_mask) == 0) + return 1; + + switch (type) { + case N_LIST: + case N_ALT: + do { + r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUANTIFIER: + r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, + anchor_mask); + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + if ((en->type & effect_mask) == 0) + return 1; + + r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask); + } + break; + + case N_ANCHOR: + type = NANCHOR(node).type; + if ((type & anchor_mask) == 0) + return 1; + + if (NANCHOR(node).target) + r = check_type_tree(NANCHOR(node).target, + type_mask, effect_mask, anchor_mask); + break; + + default: + break; + } + return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST 1 +#define RECURSION_INFINITE 2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node *x; + OnigDistance min; + int ret; + + x = node; + do { + ret = subexp_inf_recursive_check(NCONS(x).left, env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCONS(x).left, &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } + } while (IS_NOT_NULL(x = NCONS(x).right)); + } + break; + + case N_ALT: + { + int ret; + r = RECURSION_EXIST; + do { + ret = subexp_inf_recursive_check(NCONS(node).left, env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_QUANTIFIER: + r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); + if (r == RECURSION_EXIST) { + if (NQUANTIFIER(node).lower == 0) r = 0; + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check(an->target, env, head); + break; + } + } + break; + + case N_CALL: + r = subexp_inf_recursive_check(NCALL(node).target, env, head); + break; + + case N_EFFECT: + if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + return 0; + else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); + else { + SET_EFFECT_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NEFFECT(node).target, env, head); + CLEAR_EFFECT_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + do { + r = subexp_inf_recursive_check_trav(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUANTIFIER: + r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check_trav(an->target, env); + break; + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + if (IS_EFFECT_RECURSION(en)) { + SET_EFFECT_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_EFFECT_STATUS(node, NST_MARK1); + } + r = subexp_inf_recursive_check_trav(en->target, env); + } + + break; + + default: + break; + } + + return r; +} + +static int +subexp_recursive_check(Node* node) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + do { + r |= subexp_recursive_check(NCONS(node).left); + } while (IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUANTIFIER: + r = subexp_recursive_check(NQUANTIFIER(node).target); + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check(an->target); + break; + } + } + break; + + case N_CALL: + r = subexp_recursive_check(NCALL(node).target); + if (r != 0) SET_CALL_RECURSION(node); + break; + + case N_EFFECT: + if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + return 0; + else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + return 1; /* recursion */ + else { + SET_EFFECT_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NEFFECT(node).target); + CLEAR_EFFECT_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE 1 + + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + { + int ret; + do { + ret = subexp_recursive_check_trav(NCONS(node).left, env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_QUANTIFIER: + r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); + if (NQUANTIFIER(node).upper == 0) { + if (r == FOUND_CALLED_NODE) + NQUANTIFIER(node).is_refered = 1; + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check_trav(an->target, env); + break; + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + if (! IS_EFFECT_RECURSION(en)) { + if (IS_EFFECT_CALLED(en)) { + SET_EFFECT_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION); + CLEAR_EFFECT_STATUS(node, NST_MARK1); + } + } + r = subexp_recursive_check_trav(en->target, env); + if (IS_EFFECT_CALLED(en)) + r |= FOUND_CALLED_NODE; + } + break; + + default: + break; + } + + return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + do { + r = setup_subexp_call(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + do { + r = setup_subexp_call(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUANTIFIER: + r = setup_subexp_call(NQUANTIFIER(node).target, env); + break; + case N_EFFECT: + r = setup_subexp_call(NEFFECT(node).target, env); + break; + + case N_CALL: + { + int n, num, *refs; + UChar *p; + CallNode* cn = &(NCALL(node)); + Node** nodes = SCANENV_MEM_NODES(env); + +#ifdef USE_NAMED_GROUP + n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); +#else + n = -1; +#endif + if (n <= 0) { + /* name not found, check group number. (?*ddd) */ + p = cn->name; + num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); + if (num <= 0 || p != cn->name_end) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (num > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + cn->ref_num = num; + goto set_call_attr; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->ref_num = refs[0]; + set_call_attr: + cn->target = nodes[cn->ref_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_EFFECT_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); + cn->unset_addr_list = env->unset_addr_list; + } + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = setup_subexp_call(an->target, env); + break; + } + } + break; + + default: + break; + } + + return r; +} +#endif + +/* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (?<!A|B) ==> (?<!A)(?<!B) +*/ +static int +divide_look_behind_alternatives(Node* node) +{ + Node tmp_node; + Node *head, *np, *insert_node; + AnchorNode* an = &(NANCHOR(node)); + int anc_type = an->type; + + head = an->target; + np = NCONS(head).left; + tmp_node = *node; *node = *head; *head = tmp_node; + NCONS(node).left = head; + NANCHOR(head).target = np; + + np = node; + while ((np = NCONS(np).right) != NULL_NODE) { + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); + NANCHOR(insert_node).target = NCONS(np).left; + NCONS(np).left = insert_node; + } + + if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + np = node; + do { + np->type = N_LIST; /* alt -> list */ + } while ((np = NCONS(np).right) != NULL_NODE); + } + return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + AnchorNode* an = &(NANCHOR(node)); + + r = get_char_length_tree(an->target, reg, &len); + if (r == 0) + an->char_len = len; + else if (r == GET_CHAR_LEN_VARLEN) + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + r = divide_look_behind_alternatives(node); + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ + int type; + + retry: + type = NTYPE(node); + if (type == N_QUANTIFIER) { + QuantifierNode* qn = &(NQUANTIFIER(node)); + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QUANTIFIER_PEEK_NEXT + qn->next_head_exact = get_head_value_node(next_node, 1, reg); +#endif + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); + SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NEFFECT(node).target = en; + } + } + } + } + } + } + else if (type == N_EFFECT) { + EffectNode* en = &(NEFFECT(node)); + if (en->type == EFFECT_MEMORY) { + node = en->target; + goto retry; + } + } + return 0; +} + + +static int +divide_ambig_string_node_sub(regex_t* reg, int prev_ambig, + UChar* prev_start, UChar* prev, + UChar* end, Node*** tailp, Node** root) +{ + UChar *tmp, *wp; + Node* snode; + + if (prev_ambig != 0) { + tmp = prev_start; + wp = prev_start; + while (tmp < prev) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + snode = onig_node_new_str(prev_start, wp); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + NSTRING_SET_AMBIG(snode); + if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); + } + else { + snode = onig_node_new_str(prev_start, prev); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + + if (*tailp == (Node** )0) { + *root = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY); + *tailp = &(NCONS(*root).right); + } + else { + **tailp = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY); + *tailp = &(NCONS(**tailp).right); + } + + return 0; +} + +static int +divide_ambig_string_node(Node* node, regex_t* reg) +{ + StrNode* sn = &NSTRING(node); + int ambig, prev_ambig; + UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; + Node *root = NULL_NODE; + Node **tailp = (Node** )0; + int r; + + start = prev_start = p = sn->s; + end = sn->end; + if (p >= end) return 0; + + prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end); + + while (p < end) { + prev = p; + if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, + reg->ambig_flag, &p, end))) { + + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev, + end, &tailp, &root); + if (r != 0) return r; + + prev_ambig = ambig; + prev_start = prev; + } + } + + if (prev_start == start) { + if (prev_ambig != 0) { + NSTRING_SET_AMBIG(node); + tmp = start; + wp = start; + while (tmp < end) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node); + sn->end = wp; + } + } + else { + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end, + end, &tailp, &root); + if (r != 0) return r; + + swap_node(node, root); + onig_node_str_clear(root); /* should be after swap! */ + onig_node_free(root); /* free original string node */ + } + + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT 512 +#define CEC_INFINITE_NUM 0x7fffffff + +#define CEC_IN_INFINITE_REPEAT (1<<0) +#define CEC_IN_FINITE_REPEAT (1<<1) +#define CEC_CONT_BIG_REPEAT (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ + int type; + int r = state; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_comb_exp_check(NCONS(node).left, r, env); + prev = NCONS(node).left; + } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_ALT: + { + int ret; + do { + ret = setup_comb_exp_check(NCONS(node).left, state, env); + r |= ret; + } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_QUANTIFIER: + { + int child_state = state; + int add_state = 0; + QuantifierNode* qn = &(NQUANTIFIER(node)); + Node* target = qn->target; + int var_num; + + if (! IS_REPEAT_INFINITE(qn->upper)) { + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == N_EFFECT) { + EffectNode* en = &(NEFFECT(qn->target)); + if (en->type == EFFECT_MEMORY) { + if (NTYPE(en->target) == N_QUANTIFIER) { + QuantifierNode* q = &(NQUANTIFIER(en->target)); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } + } + + if (state & CEC_IN_FINITE_REPEAT) { + qn->comb_exp_check_num = -1; + } + else { + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } + } + + r = setup_comb_exp_check(target, child_state, env); + r |= add_state; + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_MEMORY: + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; + + r = setup_comb_exp_check(en->target, state, env); + } + break; + + default: + r = setup_comb_exp_check(en->target, state, env); + break; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&(NCALL(node)))) + env->has_recursion = 1; + else + r = setup_comb_exp_check(NCALL(node).target, state, env); + break; +#endif + + default: + break; + } + + return r; +} +#endif + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NCONS(node).left, reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCONS(node).left, reg); + } + prev = NCONS(node).left; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_ALT: + do { + r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_CCLASS: + break; + + case N_STRING: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = divide_ambig_string_node(node, reg); + } + break; + + case N_CTYPE: + case N_ANYCHAR: + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + break; +#endif + + case N_BACKREF: + { + int i; + int* p; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif + SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + } + } + break; + + case N_QUANTIFIER: + { + OnigDistance d; + QuantifierNode* qn = &(NQUANTIFIER(node)); + Node* target = qn->target; + + if ((state & IN_REPEAT) != 0) { + qn->state |= NST_IN_REPEAT; + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + r = quantifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif +#if 0 + r = get_max_match_length(target, &d, env); + if (r == 0 && d == 0) { + /* ()* ==> ()?, ()+ ==> () */ + qn->upper = 1; + if (qn->lower > 1) qn->lower = 1; + if (NTYPE(target) == N_STRING) { + qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ + } + } +#endif + } + } + + state |= IN_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + r = setup_tree(target, reg, state, env); + if (r) break; + + /* expand string */ +#define EXPAND_STRING_MAX_LENGTH 100 + if (NTYPE(target) == N_STRING) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(target); + StrNode* sn = &(NSTRING(target)); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, NSTRING(target).flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r) break; + } + onig_node_free(target); + break; /* break case N_QUANTIFIER: */ + } + } + } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + if (qn->greedy && (qn->target_empty_info != 0)) { + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } + } +#endif + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType options = reg->options; + reg->options = NEFFECT(node).option; + r = setup_tree(NEFFECT(node).target, reg, state, env); + reg->options = options; + } + break; + + case EFFECT_MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + r = setup_tree(en->target, reg, state, env); + break; + + case EFFECT_STOP_BACKTRACK: + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; + } + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(an->target, reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ + N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) + +#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) +#define ALLOWED_EFFECT_IN_LB_NOT 0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, state, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + } + break; + } + } + break; + + default: + break; + } + + return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, + UChar skip[], int** int_skip) +{ + int i, len; + + len = end - s; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + + for (i = 0; i < len - 1; i++) + skip[s[i]] = len - 1 - i; + } + else { + if (IS_NULL(*int_skip)) { + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + } + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + + for (i = 0; i < len - 1; i++) + (*int_skip)[s[i]] = len - 1 - i; + } + return 0; +} + +#define OPT_EXACT_MAXLEN 24 + +typedef struct { + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ +} MinMaxLen; + +typedef struct { + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigAmbigType ambig_flag; + ScanEnv* scan_env; +} OptEnv; + +typedef struct { + int left_anchor; + int right_anchor; +} OptAncInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { + MinMaxLen len; + + OptAncInfo anc; + OptExactInfo exb; /* boundary */ + OptExactInfo exm; /* middle */ + OptExactInfo expr; /* prec read (?=...) */ + + OptMapInfo map; /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ + static const short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } + else + return 4; /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ + /* 1000 / (min-max-dist + 1) */ + static const short int dist_vals[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int d; + + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + + d = mm->max - mm->min; + if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return (int )dist_vals[d]; + else + return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distance_value(d1); + v2 *= distance_value(d2); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (d2->min < d1->min) return 1; + if (d2->min > d1->min) return -1; + return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ + return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ + mml->min = min; + mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ + mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = distance_add(to->min, from->min); + to->max = distance_add(to->max, from->max); +} + +#if 0 +static void +add_len_mml(MinMaxLen* to, OnigDistance len) +{ + to->min = distance_add(to->min, len); + to->max = distance_add(to->max, len); +} +#endif + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ + if (to->min > from->min) to->min = from->min; + if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ + *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ + anc->left_anchor = 0; + anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ + *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, + OnigDistance left_len, OnigDistance right_len) +{ + clear_opt_anc_info(to); + + to->left_anchor = left->left_anchor; + if (left_len == 0) { + to->left_anchor |= right->left_anchor; + } + + to->right_anchor = right->right_anchor; + if (right_len == 0) { + to->right_anchor |= left->right_anchor; + } +} + +static int +is_left_anchor(int anc) +{ + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || + anc == ANCHOR_PREC_READ_NOT) + return 0; + + return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ + if ((to->left_anchor & anc) != 0) return 1; + + return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor |= anc; + else + to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor &= ~anc; + else + to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ + to->left_anchor &= add->left_anchor; + to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ + return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ + clear_mml(&ex->mmd); + clear_opt_anc_info(&ex->anc); + ex->reach_end = 0; + ex->ignore_case = 0; + ex->len = 0; + ex->s[0] = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ + *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +{ + int i, j, len; + UChar *p, *end; + OptAncInfo tanc; + + if (! to->ignore_case && add->ignore_case) { + if (to->len >= add->len) return ; /* avoid */ + + to->ignore_case = 1; + } + + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; + to->reach_end = (p == end ? add->reach_end : 0); + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); + if (! to->reach_end) tanc.right_anchor = 0; + copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, + UChar* s, UChar* end, int raw, OnigEncoding enc) +{ + int i, j, len; + UChar *p; + + for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ + int i, j, len; + + if (add->len == 0 || to->len == 0) { + clear_opt_exact_info(to); + return ; + } + + if (! is_equal_mml(&to->mmd, &add->mmd)) { + clear_opt_exact_info(to); + return ; + } + + for (i = 0; i < to->len && i < add->len; ) { + if (to->s[i] != add->s[i]) break; + len = enc_len(env->enc, to->s + i); + + for (j = 1; j < len; j++) { + if (to->s[i+j] != add->s[i+j]) break; + } + if (j < len) break; + i += len; + } + + if (! add->reach_end || i < add->len || i < to->len) { + to->reach_end = 0; + } + to->len = i; + to->ignore_case |= add->ignore_case; + + alt_merge_opt_anc_info(&to->anc, &add->anc); + if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ + int v1, v2; + + v1 = now->len; + v2 = alt->len; + + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); + + if (now->len > 1) v1 += 5; + if (alt->len > 1) v2 += 5; + } + + if (now->ignore_case == 0) v1 *= 2; + if (alt->ignore_case == 0) v2 *= 2; + + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ + static const OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; + + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ + *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ + if (map->map[c] == 0) { + map->map[c] = 1; + map->value += map_position_value(enc, c); + } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, + OnigEncoding enc, OnigAmbigType ambig_flag) +{ + int i, n, len; + UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + OnigCodePoint code; + const OnigPairAmbigCodes* pccs; + OnigAmbigType amb; + + add_char_opt_map_info(map, p[0], enc); + code = ONIGENC_MBC_TO_CODE(enc, p, end); + + for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { + if ((amb & ambig_flag) == 0) continue; + + n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs); + for (i = 0; i < n; i++) { + if (pccs[i].from == code) { + len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf); + if (len < 0) return len; + add_char_opt_map_info(map, buf[0], enc); + } + } + } + return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ + static int z = 1<<15; /* 32768: something big value */ + + int v1, v2; + + if (alt->value == 0) return ; + if (now->value == 0) { + copy_opt_map_info(now, alt); + return ; + } + + v1 = z / now->value; + v2 = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE 20 + int ve, vm; + + if (m->value <= 0) return -1; + + ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + vm = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ + int i, val; + + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (to->value == 0) return ; + if (add->value == 0 || to->mmd.max < add->mmd.min) { + clear_opt_map_info(to); + return ; + } + + alt_merge_mml(&to->mmd, &add->mmd); + + val = 0; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (add->map[i]) + to->map[i] = 1; + + if (to->map[i]) + val += map_position_value(enc, i); + } + to->value = val; + + alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ + copy_mml(&(opt->exb.mmd), mmd); + copy_mml(&(opt->expr.mmd), mmd); + copy_mml(&(opt->map.mmd), mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ + clear_mml(&opt->len); + clear_opt_anc_info(&opt->anc); + clear_opt_exact_info(&opt->exb); + clear_opt_exact_info(&opt->exm); + clear_opt_exact_info(&opt->expr); + clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ + *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ + int exb_reach, exm_reach; + OptAncInfo tanc; + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); + copy_opt_anc_info(&to->anc, &tanc); + + if (add->exb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, + to->len.max, add->len.max); + copy_opt_anc_info(&add->exb.anc, &tanc); + } + + if (add->map.value > 0 && to->len.max == 0) { + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; + } + + exb_reach = to->exb.reach_end; + exm_reach = to->exm.reach_end; + + if (add->len.max != 0) + to->exb.reach_end = to->exm.reach_end = 0; + + if (add->exb.len > 0) { + if (exb_reach) { + concat_opt_exact_info(&to->exb, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + else if (exm_reach) { + concat_opt_exact_info(&to->exm, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + } + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); + + if (to->expr.len > 0) { + if (add->len.max > 0) { + if (to->expr.len > (int )add->len.max) + to->expr.len = add->len.max; + + if (to->expr.mmd.max == 0) + select_opt_exact_info(enc, &to->exb, &to->expr); + else + select_opt_exact_info(enc, &to->exm, &to->expr); + } + } + else if (add->expr.len > 0) { + copy_opt_exact_info(&to->expr, &add->expr); + } + + select_opt_map_info(&to->map, &add->map); + + add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ + alt_merge_opt_anc_info (&to->anc, &add->anc); + alt_merge_opt_exact_info(&to->exb, &add->exb, env); + alt_merge_opt_exact_info(&to->exm, &add->exm, env); + alt_merge_opt_exact_info(&to->expr, &add->expr, env); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); + + alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT 5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ + int type; + int r = 0; + + clear_node_opt_info(opt); + set_bound_node_opt_info(opt, &env->mmd); + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + OptEnv nenv; + NodeOptInfo nopt; + Node* nd = node; + + copy_opt_env(&nenv, env); + do { + r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); + } + break; + + case N_ALT: + { + NodeOptInfo nopt; + Node* nd = node; + + do { + r = optimize_node_left(NCONS(nd).left, &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } + } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right)); + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + int slen = sn->end - sn->s; + int is_raw = NSTRING_IS_RAW(node); + + if (! NSTRING_IS_AMBIG(node)) { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + NSTRING_IS_RAW(node), env->enc); + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); + } + else { + int n, max; + + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->ambig_flag); + if (r != 0) break; + } + + if (NSTRING_IS_AMBIG_REDUCE(node)) { + n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + max = slen; + } + set_mml(&opt->len, slen, max); + } + + if (opt->exb.len == slen) + opt->exb.reach_end = 1; + } + break; + + case N_CCLASS: + { + int i, z; + CClassNode* cc = &(NCCLASS(node)); + + /* no need to check ignore case. (setted in setup_tree()) */ + + if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + set_mml(&opt->len, min, max); + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); + } + } + break; + + case N_CTYPE: + { + int i, min, max; + + max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + if (max == 1) { + min = 1; + + switch (NCTYPE(node).type) { + case CTYPE_NOT_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + break; + + case CTYPE_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + break; + } + } + else { + min = ONIGENC_MBC_MINLEN(env->enc); + } + set_mml(&opt->len, min, max); + } + break; + + case N_ANYCHAR: + { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, min, max); + } + break; + + case N_ANCHOR: + switch (NANCHOR(node).type) { + case ANCHOR_BEGIN_BUF: + case ANCHOR_BEGIN_POSITION: + case ANCHOR_BEGIN_LINE: + case ANCHOR_END_BUF: + case ANCHOR_SEMI_END_BUF: + case ANCHOR_END_LINE: + add_opt_anc_info(&opt->anc, NANCHOR(node).type); + break; + + case ANCHOR_PREC_READ: + { + NodeOptInfo nopt; + + r = optimize_node_left(NANCHOR(node).target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); + + opt->expr.reach_end = 0; + + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } + } + break; + + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case ANCHOR_LOOK_BEHIND_NOT: + break; + } + break; + + case N_BACKREF: + { + int i; + int* backs; + OnigDistance min, max, tmin, tmax; + Node** nodes = SCANENV_MEM_NODES(env->scan_env); + BackrefNode* br = &(NBACKREF(node)); + + if (br->state & NST_RECURSION) { + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; + } + backs = BACKREFS_P(br); + r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + if (r == 0) set_mml(&opt->len, min, max); + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&(NCALL(node)))) + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + else { + OnigOptionType save = env->options; + env->options = NEFFECT(NCALL(node).target).option; + r = optimize_node_left(NCALL(node).target, opt, env); + env->options = save; + } + break; +#endif + + case N_QUANTIFIER: + { + int i; + OnigDistance min, max; + NodeOptInfo nopt; + QuantifierNode* qn = &(NQUANTIFIER(node)); + + r = optimize_node_left(qn->target, &nopt, env); + if (r) break; + + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (env->mmd.max == 0 && + NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } + } + else { + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i < qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + } + + min = distance_multiply(nopt.len.min, qn->lower); + if (IS_REPEAT_INFINITE(qn->upper)) + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + else + max = distance_multiply(nopt.len.max, qn->upper); + + set_mml(&opt->len, min, max); + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType save = env->options; + + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; + + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; + if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else +#endif + { + r = optimize_node_left(en->target, opt, env); + + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; + + case EFFECT_STOP_BACKTRACK: + r = optimize_node_left(en->target, opt, env); + break; + } + } + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "optimize_node_left: undefined node type %d\n", + NTYPE(node)); +#endif + r = ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ + int r; + + if (e->len == 0) return 0; + + if (e->ignore_case) { + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + } + else { + int allow_reverse; + + reg->exact = k_strdup(e->s, e->s + e->len); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + reg->exact_end = reg->exact + e->len; + + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + + if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { + r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, + reg->map, &(reg->int_map)); + if (r) return r; + + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + } + else { + reg->optimize = ONIG_OPTIMIZE_EXACT; + } + } + + reg->dmin = e->mmd.min; + reg->dmax = e->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + } + + return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ + int i; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + reg->map[i] = m->map[i]; + + reg->optimize = ONIG_OPTIMIZE_MAP; + reg->dmin = m->mmd.min; + reg->dmax = m->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + 1; + } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ + reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + + int r; + NodeOptInfo opt; + OptEnv env; + + env.enc = reg->enc; + env.options = reg->options; + env.ambig_flag = reg->ambig_flag; + env.scan_env = scan_env; + clear_mml(&env.mmd); + + r = optimize_node_left(node, &opt, &env); + if (r) return r; + + reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + + if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + reg->anchor_dmin = opt.len.min; + reg->anchor_dmax = opt.len.max; + } + + if (opt.exb.len > 0 || opt.exm.len > 0) { + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + goto set_map; + } + else { + r = set_optimize_exact_info(reg, &opt.exb); + set_sub_anchor(reg, &opt.exb.anc); + } + } + else if (opt.map.value > 0) { + set_map: + set_optimize_map_info(reg, &opt.map); + set_sub_anchor(reg, &opt.map.anc); + } + else { + reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + if (opt.len.max == 0) + reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + print_optimize_info(stderr, reg); +#endif + return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ + reg->optimize = ONIG_OPTIMIZE_NONE; + reg->anchor = 0; + reg->anchor_dmin = 0; + reg->anchor_dmax = 0; + reg->sub_anchor = 0; + reg->exact_end = (UChar* )NULL; + reg->threshold_len = 0; + if (IS_NOT_NULL(reg->exact)) { + xfree(reg->exact); + reg->exact = (UChar* )NULL; + } +} + +#ifdef ONIG_DEBUG + +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enc_len(enc, p); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ + if (a == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", a); + + fputs("-", f); + + if (b == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ + int q = 0; + + fprintf(f, "["); + + if (anchor & ANCHOR_BEGIN_BUF) { + fprintf(f, "begin-buf"); + q = 1; + } + if (anchor & ANCHOR_BEGIN_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-line"); + } + if (anchor & ANCHOR_BEGIN_POSITION) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-pos"); + } + if (anchor & ANCHOR_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-buf"); + } + if (anchor & ANCHOR_SEMI_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "semi-end-buf"); + } + if (anchor & ANCHOR_END_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-line"); + } + if (anchor & ANCHOR_ANYCHAR_STAR) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "anychar-star"); + } + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (q) fprintf(f, ", "); + fprintf(f, "anychar-star-pl"); + } + + fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; + + fprintf(f, "optimize: %s\n", on[reg->optimize]); + fprintf(f, " anchor: "); print_anchor(f, reg->anchor); + if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); + fprintf(f, "\n"); + + if (reg->optimize) { + fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + if (reg->exact) { + UChar *p; + fprintf(f, "exact: ["); + for (p = reg->exact; p < reg->exact_end; p++) { + fputc(*p, f); + } + fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); + } + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + int c, i, n = 0; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i]) n++; + + fprintf(f, "map: n=%d\n", n); + if (n > 0) { + c = 0; + fputc('[', f); + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } + } + fprintf(f, "]\n"); + } + } +} +#endif /* ONIG_DEBUG */ + + +static void +onig_free_body(regex_t* reg) +{ + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP + onig_names_free(reg); +#endif +} + +extern void +onig_free(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + onig_free_body(reg); + xfree(reg); + } +} + +#define REGEX_TRANSFER(to,from) do {\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ + xmemcpy(to, from, sizeof(regex_t));\ + xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ + THREAD_ATOMIC_START; + REGEX_TRANSFER(to, from); + THREAD_ATOMIC_END; +} + +#define REGEX_CHAIN_HEAD(reg) do {\ + while (IS_NOT_NULL((reg)->chain)) {\ + (reg) = (reg)->chain;\ + }\ +} while (0) + +extern void +onig_chain_link_add(regex_t* to, regex_t* add) +{ + THREAD_ATOMIC_START; + REGEX_CHAIN_HEAD(to); + to->chain = add; + THREAD_ATOMIC_END; +} + +extern void +onig_chain_reduce(regex_t* reg) +{ + regex_t *head, *prev; + + prev = reg; + head = prev->chain; + if (IS_NOT_NULL(head)) { + reg->state = ONIG_STATE_MODIFY; + while (IS_NOT_NULL(head->chain)) { + prev = head; + head = head->chain; + } + prev->chain = (regex_t* )NULL; + REGEX_TRANSFER(reg, head); + } +} + +#if 0 +extern int +onig_clone(regex_t** to, regex_t* from) +{ + int r, size; + regex_t* reg; + +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(from); + if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(from); + ONIG_STATE_INC(from); + } + } + else { + int n = 0; + while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(from); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + + r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + from->enc, ONIG_SYNTAX_DEFAULT); + if (r != 0) { + ONIG_STATE_DEC(from); + return r; + } + + xmemcpy(reg, from, sizeof(onig_t)); + reg->chain = (regex_t* )NULL; + reg->state = ONIG_STATE_NORMAL; + + if (from->p) { + reg->p = (UChar* )xmalloc(reg->alloc); + if (IS_NULL(reg->p)) goto mem_error; + xmemcpy(reg->p, from->p, reg->alloc); + } + + if (from->exact) { + reg->exact = (UChar* )xmalloc(from->exact_end - from->exact); + if (IS_NULL(reg->exact)) goto mem_error; + reg->exact_end = reg->exact + (from->exact_end - from->exact); + xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact); + } + + if (from->int_map) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map = (int* )xmalloc(size); + if (IS_NULL(reg->int_map)) goto mem_error; + xmemcpy(reg->int_map, from->int_map, size); + } + + if (from->int_map_backward) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map_backward = (int* )xmalloc(size); + if (IS_NULL(reg->int_map_backward)) goto mem_error; + xmemcpy(reg->int_map_backward, from->int_map_backward, size); + } + +#ifdef USE_NAMED_GROUP + reg->name_table = names_clone(from); /* names_clone is not implemented */ +#endif + + ONIG_STATE_DEC(from); + *to = reg; + return 0; + + mem_error: + ONIG_STATE_DEC(from); + return ONIGERR_MEMORY; +} +#endif + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigErrorInfo* einfo) +{ +#define COMPILE_INIT_SIZE 20 + + int r, init_size; + Node* root; + ScanEnv scan_env; +#ifdef USE_SUBEXP_CALL + UnsetAddrList uslist; +#endif + + reg->state = ONIG_STATE_COMPILING; + +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + + if (reg->alloc == 0) { + init_size = (pattern_end - pattern) * 2; + if (init_size <= 0) init_size = COMPILE_INIT_SIZE; + r = BBUF_INIT(reg, init_size); + if (r != 0) goto end; + } + else + reg->used = 0; + + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_null_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + reg->num_comb_exp_check = 0; +#endif + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); + + if (r != 0) goto err; + } +#endif + +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_init(&uslist, scan_env.num_call); + if (r != 0) goto err; + scan_env.unset_addr_list = &uslist; + r = setup_subexp_call(root, &scan_env); + if (r != 0) goto err_unset; + r = subexp_recursive_check_trav(root, &scan_env); + if (r < 0) goto err_unset; + r = subexp_inf_recursive_check_trav(root, &scan_env); + if (r != 0) goto err_unset; + + reg->num_call = scan_env.num_call; + } + else + reg->num_call = 0; +#endif + + r = setup_tree(root, reg, 0, &scan_env); + if (r != 0) goto err_unset; + + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL + || scan_env.num_call == 0 +#endif + ) { + setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL + if (scan_env.has_recursion != 0) { + scan_env.num_comb_exp_check = 0; + } + else +#endif + if (scan_env.comb_exp_max_regnum > 0) { + int i; + for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } + } + } + } + + reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + + clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE + r = set_optimize_info_from_tree(root, reg, &scan_env); + if (r != 0) goto err_unset; +#endif + + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { + xfree(scan_env.mem_nodes_dynamic); + scan_env.mem_nodes_dynamic = (Node** )NULL; + } + + r = compile_tree(root, reg); + if (r == 0) { + r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_fix(&uslist, reg); + unset_addr_list_end(&uslist); + if (r) goto err; + } +#endif + + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + reg->stack_pop_level = STACK_POP_LEVEL_ALL; + else { + if (reg->bt_mem_start != 0) + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + else + reg->stack_pop_level = STACK_POP_LEVEL_FREE; + } + } +#ifdef USE_SUBEXP_CALL + else if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); +#endif + print_compiled_byte_code_list(stderr, reg); +#endif + + end: + reg->state = ONIG_STATE_NORMAL; + return r; + + err_unset: +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + err: + if (IS_NOT_NULL(scan_env.error)) { + if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; + einfo->par = scan_env.error; + einfo->par_end = scan_env.error_end; + } + } + + if (IS_NOT_NULL(root)) onig_node_free(root); + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) + xfree(scan_env.mem_nodes_dynamic); + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif + +static int onig_inited = 0; + +extern int +onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, + OnigEncoding enc, OnigSyntaxType* syntax) +{ + if (! onig_inited) + onig_init(); + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + (*reg)->state = ONIG_STATE_MODIFY; + + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { + option |= syntax->options; + option &= ~ONIG_OPTION_SINGLELINE; + } + else + option |= syntax->options; + + (*reg)->enc = enc; + (*reg)->options = option; + (*reg)->syntax = syntax; + (*reg)->optimize = 0; + (*reg)->exact = (UChar* )NULL; + (*reg)->int_map = (int* )NULL; + (*reg)->int_map_backward = (int* )NULL; + (*reg)->chain = (regex_t* )NULL; + + (*reg)->p = (UChar* )NULL; + (*reg)->alloc = 0; + (*reg)->used = 0; + (*reg)->name_table = (void* )NULL; + + (*reg)->ambig_flag = ambig_flag; + (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc); + + return 0; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + enc, syntax); + if (r) return r; + + r = onig_compile(*reg, pattern, pattern_end, einfo); + if (r) { + onig_free(*reg); + *reg = NULL; + } + return r; +} + +extern int +onig_init(void) +{ + if (onig_inited != 0) + return 0; + + onig_inited = 1; + + THREAD_SYSTEM_INIT; + THREAD_ATOMIC_START; + + onigenc_init(); + onigenc_set_default_caseconv_table((UChar* )0); + +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); +#endif + + THREAD_ATOMIC_END; + return 0; +} + + +extern int +onig_end(void) +{ + extern int onig_free_shared_cclass_table(void); + + THREAD_ATOMIC_START; + +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); +#endif + +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + +#ifdef USE_RECYCLE_NODE + onig_free_node_list(); +#endif + + onig_inited = 0; + + THREAD_ATOMIC_END; + THREAD_SYSTEM_END; + return 0; +} + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_STATE_CHECK 6 + +OnigOpInfoType OnigOpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, + { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, + { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, + { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_ML_STAR, + "state-check-anychar-ml*", ARG_STATE_CHECK }, + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + StateCheckNumType scn; + OnigCodePoint code; + UChar *q; + + fprintf(f, "[%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_STATE_CHECK: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + fprintf(f, ":%d", scn); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enc_len(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + + case OP_BACKREFN_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_AT_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":(%d)", addr); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_PUSH_LOOK_BEHIND_NOT: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:(%d)", len, addr); + break; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:(%d)", scn, addr); + break; + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", + *--bp); + } + } + fputs("]", f); + if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + int ncode; + UChar* bp = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "code length: %d\n", reg->used); + + ncode = 0; + while (bp < end) { + ncode++; + if (bp > reg->p) { + if (ncode % 5 == 0) + fprintf(f, "\n"); + else + fputs(" ", f); + } + onig_print_compiled_byte_code(f, bp, &bp, reg->enc); + } + + fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ + int i, type; + int add = 3; + UChar* p; + + Indent(f, indent); + if (IS_NULL(node)) { + fprintf(f, "ERROR: null node!!!\n"); + exit (0); + } + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + if (NTYPE(node) == N_LIST) + fprintf(f, "<list:%x>\n", (int )node); + else + fprintf(f, "<alt:%x>\n", (int )node); + + print_indent_tree(f, NCONS(node).left, indent + add); + while (IS_NOT_NULL(node = NCONS(node).right)) { + if (NTYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); + } + print_indent_tree(f, NCONS(node).left, indent + add); + } + break; + + case N_STRING: + fprintf(f, "<string%s:%x>", + (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); + for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } + break; + + case N_CCLASS: + fprintf(f, "<cclass:%x>", (int )node); + if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node).mbuf) { + BBuf* bbuf = NCCLASS(node).mbuf; + for (i = 0; i < bbuf->used; i++) { + if (i > 0) fprintf(f, ","); + fprintf(f, "%0x", bbuf->p[i]); + } + } + break; + + case N_CTYPE: + fprintf(f, "<ctype:%x> ", (int )node); + switch (NCTYPE(node).type) { + case CTYPE_WORD: fputs("word", f); break; + case CTYPE_NOT_WORD: fputs("not word", f); break; + default: + fprintf(f, "ERROR: undefined ctype.\n"); + exit(0); + } + break; + + case N_ANYCHAR: + fprintf(f, "<anychar:%x>", (int )node); + break; + + case N_ANCHOR: + fprintf(f, "<anchor:%x> ", (int )node); + switch (NANCHOR(node).type) { + case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCHOR_END_BUF: fputs("end buf", f); break; + case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; + case ANCHOR_END_LINE: fputs("end line", f); break; + case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCHOR_WORD_BOUND: fputs("word bound", f); break; + case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; + case ANCHOR_WORD_END: fputs("word end", f); break; +#endif + case ANCHOR_PREC_READ: fputs("prec read", f); break; + case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; + case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; + case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + + default: + fprintf(f, "ERROR: undefined anchor type.\n"); + break; + } + break; + + case N_BACKREF: + { + int* p; + BackrefNode* br = &(NBACKREF(node)); + p = BACKREFS_P(br); + fprintf(f, "<backref:%x>", (int )node); + for (i = 0; i < br->back_num; i++) { + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + { + CallNode* cn = &(NCALL(node)); + fprintf(f, "<call:%x>", (int )node); + p_string(f, cn->name_end - cn->name, cn->name); + } + break; +#endif + + case N_QUANTIFIER: + fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, + NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, + (NQUANTIFIER(node).greedy ? "" : "?")); + print_indent_tree(f, NQUANTIFIER(node).target, indent + add); + break; + + case N_EFFECT: + fprintf(f, "<effect:%x> ", (int )node); + switch (NEFFECT(node).type) { + case EFFECT_OPTION: + fprintf(f, "option:%d\n", NEFFECT(node).option); + print_indent_tree(f, NEFFECT(node).target, indent + add); + break; + case EFFECT_MEMORY: + fprintf(f, "memory:%d", NEFFECT(node).regnum); + break; + case EFFECT_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NEFFECT(node).target, indent + add); + break; + + default: + fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + break; + } + + if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && + type != N_EFFECT) + fprintf(f, "\n"); + fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ + print_indent_tree(f, node, 0); +} +#endif diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c new file mode 100644 index 0000000..958917e --- /dev/null +++ b/ext/mbstring/oniguruma/regenc.c @@ -0,0 +1,1028 @@ +/********************************************************************** + regenc.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init(void) +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding(void) +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + if (p < s) { + p += enc_len(enc, p); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + const UChar* start, const UChar* s, const UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + + if (p < s) { + if (prev) *prev = (const UChar* )p; + p += enc_len(enc, p); + } + else { + if (prev) *prev = (const UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + } + return (UChar* )s; +} + +extern UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) +{ + UChar* q = (UChar* )p; + while (n-- > 0) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + } + return (q <= end ? q : NULL); +} + +extern int +onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int n = 0; + UChar* q = (UChar* )p; + + while (q < end) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + n++; + } + return n; +} + +extern int +onigenc_strlen_null(OnigEncoding enc, const UChar* s) +{ + int n = 0; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return n; + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return n; + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + n++; + } +} + +extern int +onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) +{ + UChar* start = (UChar* )s; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return (int )(p - start); + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return (int )(p - start); + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + } +} + +#ifndef ONIG_RUBY_M17N + +#ifndef NOT_RUBY + +#define USE_APPLICATION_TO_LOWER_CASE_TABLE + +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; +#endif + +const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0; + +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE +static const UChar BuiltInAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncAsciiToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif + +const unsigned short OnigEncAsciiCtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', +}; +#endif + +extern void +onigenc_set_default_caseconv_table(const UChar* table) +{ + if (table == (const UChar* )0) { +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE + table = BuiltInAsciiToLowerCaseTable; +#else + return ; +#endif + } + + if (table != OnigEncAsciiToLowerCaseTable) { + OnigEncAsciiToLowerCaseTable = table; + } +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); +} + +const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { + { 0x41, 0x61 }, + { 0x42, 0x62 }, + { 0x43, 0x63 }, + { 0x44, 0x64 }, + { 0x45, 0x65 }, + { 0x46, 0x66 }, + { 0x47, 0x67 }, + { 0x48, 0x68 }, + { 0x49, 0x69 }, + { 0x4a, 0x6a }, + { 0x4b, 0x6b }, + { 0x4c, 0x6c }, + { 0x4d, 0x6d }, + { 0x4e, 0x6e }, + { 0x4f, 0x6f }, + { 0x50, 0x70 }, + { 0x51, 0x71 }, + { 0x52, 0x72 }, + { 0x53, 0x73 }, + { 0x54, 0x74 }, + { 0x55, 0x75 }, + { 0x56, 0x76 }, + { 0x57, 0x77 }, + { 0x58, 0x78 }, + { 0x59, 0x79 }, + { 0x5a, 0x7a }, + + { 0x61, 0x41 }, + { 0x62, 0x42 }, + { 0x63, 0x43 }, + { 0x64, 0x44 }, + { 0x65, 0x45 }, + { 0x66, 0x46 }, + { 0x67, 0x47 }, + { 0x68, 0x48 }, + { 0x69, 0x49 }, + { 0x6a, 0x4a }, + { 0x6b, 0x4b }, + { 0x6c, 0x4c }, + { 0x6d, 0x4d }, + { 0x6e, 0x4e }, + { 0x6f, 0x4f }, + { 0x70, 0x50 }, + { 0x71, 0x51 }, + { 0x72, 0x52 }, + { 0x73, 0x53 }, + { 0x74, 0x54 }, + { 0x75, 0x55 }, + { 0x76, 0x56 }, + { 0x77, 0x57 }, + { 0x78, 0x58 }, + { 0x79, 0x59 }, + { 0x7a, 0x5a } +}; + +extern int +onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); + } + else { + return 0; + } +} + +extern int +onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag, + const OnigCompAmbigCodes** ccs) +{ + return 0; +} + +extern int +onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) +{ + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); + } + else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +extern int +onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, + const OnigCompAmbigCodes** ccs) +{ + static const OnigCompAmbigCodes folds[] = { + { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = folds; + return sizeof(folds) / sizeof(OnigCompAmbigCodes); + } + else + return 0; +} + +extern int +onigenc_not_support_get_ctype_code_range(int ctype, + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + } + return 0; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end, + UChar* lower) +{ + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); + } + else { + *lower = **p; + } + + (*p)++; + return 1; /* return byte length of converted char to lower */ +} + +extern int +onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + return FALSE; + } +} + +extern int +onigenc_single_byte_mbc_enc_len(const UChar* p) +{ + return 1; +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code) +{ + return 1; +} + +extern int +onigenc_single_byte_code_to_mbc_first(OnigCodePoint code) +{ + return (code & 0xff); +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + *buf = (UChar )(code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s) +{ + return (UChar* )s; +} + +extern int +onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + return TRUE; +} + +extern int +onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + return FALSE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(enc, p); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + int len; + const UChar *p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; + } + else { + len = enc_len(enc, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted to lower char */ + } +} + +extern int +onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + return FALSE; + } + } + + (*pp) += enc_len(enc, p); + return FALSE; +} + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb4_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff000000) != 0) { + first = (code >> 24) & 0xff; + } + else if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(enc, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = (UChar )((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0 || p != buf) { + *p++ = (UChar )((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0 || p != buf) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(enc, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + x = *sascii - c; + if (x) return x; + + sascii++; + p += enc_len(enc, p); + } + return 0; +} + +#else /* ONIG_RUBY_M17N */ + +extern int +onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype) +{ + switch (ctype) { + case ONIGENC_CTYPE_NEWLINE: + if (code == 0x0a) return 1; + break; + + case ONIGENC_CTYPE_ALPHA: + return m17n_isalpha(enc, code); + break; + case ONIGENC_CTYPE_BLANK: + return ONIGENC_IS_CODE_BLANK(enc, (int )(code)); + break; + case ONIGENC_CTYPE_CNTRL: + return m17n_iscntrl(enc, code); + break; + case ONIGENC_CTYPE_DIGIT: + return m17n_isdigit(enc, code); + break; + case ONIGENC_CTYPE_GRAPH: + return ONIGENC_IS_CODE_GRAPH(enc, (int )(code)); + break; + case ONIGENC_CTYPE_LOWER: + return m17n_islower(enc, code); + break; + case ONIGENC_CTYPE_PRINT: + return m17n_isprint(enc, code); + break; + case ONIGENC_CTYPE_PUNCT: + return m17n_ispunct(enc, code); + break; + case ONIGENC_CTYPE_SPACE: + return m17n_isspace(enc, code); + break; + case ONIGENC_CTYPE_UPPER: + return m17n_isupper(enc, code); + break; + case ONIGENC_CTYPE_XDIGIT: + return m17n_isxdigit(enc, code); + break; + case ONIGENC_CTYPE_WORD: + return m17n_iswchar(enc, code); + break; + case ONIGENC_CTYPE_ASCII: + return (code < 128 ? TRUE : FALSE); + break; + case ONIGENC_CTYPE_ALNUM: + return m17n_isalnum(enc, code); + break; + default: + break; + } + + return 0; +} + +extern int +onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + int c, len; + + m17n_mbcput(enc, code, buf); + c = m17n_firstbyte(enc, code); + len = enc_len(enc, c); + return len; +} + +extern int +onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf) +{ + unsigned int c, low; + + c = m17n_codepoint(enc, p, p + enc_len(enc, *p)); + low = m17n_tolower(enc, c); + m17n_mbcput(enc, low, buf); + + return m17n_codelen(enc, low); +} + +extern int +onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, + UChar** pp, UChar* end) +{ + int len; + unsigned int c; + UChar* p = *pp; + + len = enc_len(enc, *p); + (*pp) += len; + c = m17n_codepoint(enc, p, p + len); + + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + if (m17n_isupper(enc, c) || m17n_islower(enc, c)) + return TRUE; + } + + return FALSE; +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + UChar *p; + int len; + + if (s <= start) return s; + p = s; + + while (!m17n_islead(enc, *p) && p > start) p--; + while (p + (len = enc_len(enc, *p)) < s) { + p += len; + } + if (p + len == s) return s; + return p; +} + +extern int +onigenc_is_allowed_reverse_match(OnigEncoding enc, + const UChar* s, const UChar* end) +{ + return ONIGENC_IS_SINGLEBYTE(enc); +} + +extern void +onigenc_set_default_caseconv_table(UChar* table) { } + +#endif /* ONIG_RUBY_M17N */ diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h new file mode 100644 index 0000000..58ee3e7 --- /dev/null +++ b/ext/mbstring/oniguruma/regenc.h @@ -0,0 +1,147 @@ +#ifndef REGENC_H +#define REGENC_H +/********************************************************************** + regenc.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef RUBY_PLATFORM +#include "config.h" +#endif +#include "oniguruma.h" + +#ifndef NULL +#define NULL ((void* )0) +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* error codes */ +#define ONIGENCERR_MEMORY -5 +#define ONIGENCERR_TYPE_BUG -6 +#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401 + +#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) +#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL +#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) + + +#ifdef ONIG_RUBY_M17N + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF + +#else /* ONIG_RUBY_M17N */ + +#define USE_UNICODE_FULL_RANGE_CTYPE +/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + +/* for encoding system implementation (internal) */ +ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); + +/* methods for single byte encoding */ +ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); +ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end)); +ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); +ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); +ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); + +/* methods for multi byte encoding */ +ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); +ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end)); +ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); + +ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes)); + +/* in enc/unicode.c */ +ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); + + +#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ + OnigEncISO_8859_1_ToLowerCaseTable[c] +#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ + OnigEncISO_8859_1_ToUpperCaseTable[c] +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0) + +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; +ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; + +#endif /* is not ONIG_RUBY_M17N */ + +ONIG_EXTERN int +onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); +ONIG_EXTERN UChar* +onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); + +/* defined in regexec.c, but used in enc/xxx.c */ +extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; +ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; + +#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] +#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] +#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ + ((OnigEncAsciiCtypeTable[code] & ctype) != 0) +#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ + ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)) + +#endif /* REGENC_H */ diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c new file mode 100644 index 0000000..d6ec918 --- /dev/null +++ b/ext/mbstring/oniguruma/regerror.c @@ -0,0 +1,371 @@ +/********************************************************************** + regerror.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" +#include <stdio.h> /* for vsnprintf() */ + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +extern UChar* +onig_error_code_to_format(int code) +{ + char *p; + + if (code >= 0) return (UChar* )0; + + switch (code) { + case ONIG_MISMATCH: + p = "mismatch"; break; + case ONIG_NO_SUPPORT_CONFIG: + p = "no support in this configuration"; break; + case ONIGERR_MEMORY: + p = "fail to memory allocation"; break; + case ONIGERR_MATCH_STACK_LIMIT_OVER: + p = "match-stack limit over"; break; + case ONIGERR_TYPE_BUG: + p = "undefined type (bug)"; break; + case ONIGERR_PARSER_BUG: + p = "internal parser error (bug)"; break; + case ONIGERR_STACK_BUG: + p = "stack error (bug)"; break; + case ONIGERR_UNDEFINED_BYTECODE: + p = "undefined bytecode (bug)"; break; + case ONIGERR_UNEXPECTED_BYTECODE: + p = "unexpected bytecode (bug)"; break; + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + p = "default multibyte-encoding is not setted"; break; + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + p = "can't convert to wide-char on specified multibyte-encoding"; break; + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: + p = "end pattern at left brace"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: + p = "end pattern at left bracket"; break; + case ONIGERR_EMPTY_CHAR_CLASS: + p = "empty char-class"; break; + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: + p = "premature end of char-class"; break; + case ONIGERR_END_PATTERN_AT_ESCAPE: + p = "end pattern at escape"; break; + case ONIGERR_END_PATTERN_AT_META: + p = "end pattern at meta"; break; + case ONIGERR_END_PATTERN_AT_CONTROL: + p = "end pattern at control"; break; + case ONIGERR_META_CODE_SYNTAX: + p = "illegal meta-code syntax"; break; + case ONIGERR_CONTROL_CODE_SYNTAX: + p = "illegal control-code syntax"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + p = "char-class value at end of range"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + p = "char-class value at start of range"; break; + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + p = "unmatched range specifier in char-class"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + p = "target of repeat operator is not specified"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + p = "target of repeat operator is invalid"; break; + case ONIGERR_NESTED_REPEAT_OPERATOR: + p = "nested repeat operator"; break; + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: + p = "unmatched close parenthesis"; break; + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + p = "end pattern with unmatched parenthesis"; break; + case ONIGERR_END_PATTERN_IN_GROUP: + p = "end pattern in group"; break; + case ONIGERR_UNDEFINED_GROUP_OPTION: + p = "undefined group option"; break; + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: + p = "invalid POSIX bracket type"; break; + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: + p = "invalid pattern in look-behind"; break; + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: + p = "invalid repeat range {lower,upper}"; break; + case ONIGERR_TOO_BIG_NUMBER: + p = "too big number"; break; + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + p = "too big number for repeat range"; break; + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + p = "upper is smaller than lower in repeat range"; break; + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: + p = "empty range in char class"; break; + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + p = "mismatch multibyte code length in char-class range"; break; + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: + p = "too many multibyte code ranges are specified"; break; + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: + p = "too short multibyte code string"; break; + case ONIGERR_TOO_BIG_BACKREF_NUMBER: + p = "too big backref number"; break; + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP + p = "invalid backref number/name"; break; +#else + p = "invalid backref number"; break; +#endif + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + p = "too big wide-char value"; break; + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: + p = "too long wide-char value"; break; + case ONIGERR_INVALID_WIDE_CHAR_VALUE: + p = "invalid wide-char value"; break; + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: + p = "undefined name <%n> reference"; break; + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + p = "multiplex definition name <%n> call"; break; + case ONIGERR_NEVER_ENDING_RECURSION: + p = "never ending recursion"; break; + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name {%n}"; break; + case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: + p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: + p = "over thread pass limit count"; break; + + default: + p = "undefined error code"; break; + } + + return (UChar* )p; +} + + +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (len + 5 <= buf_size) { + sprintf((char* )(&(buf[len])), "\\%03o", + (unsigned int)(code & 0377)); + len += 5; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enc_len(enc, p); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ +#define MAX_ERROR_PAR_LEN 30 + +extern int +#ifdef HAVE_STDARG_PROTOTYPES +onig_error_code_to_str(UChar* s, int code, ...) +#else +onig_error_code_to_str(s, code, va_alist) + UChar* s; + int code; + va_dcl +#endif +{ + UChar *p, *q; + OnigErrorInfo* einfo; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; + va_list vargs; + + va_init_list(vargs, code); + + switch (code) { + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + q = onig_error_code_to_format(code); + p = s; + while (*q != '\0') { + if (*q == '%') { + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; + } + else { + normal_char: + *p++ = *q++; + } + } + *p = '\0'; + len = p - s; + break; + + default: + q = onig_error_code_to_format(code); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); + xmemcpy(s, q, len); + s[len] = '\0'; + break; + } + + va_end(vargs); + return len; +} + + +void +#ifdef HAVE_STDARG_PROTOTYPES +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) +#else +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) + UChar buf[]; + int bufsize; + OnigEncoding enc; + UChar* pat; + UChar* pat_end; + const UChar *fmt; + va_dcl +#endif +{ + int n, need, len; + UChar *p, *s, *bp; + UChar bs[6]; + va_list args; + + va_init_list(args, fmt); + n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); + va_end(args); + + need = (pat_end - pat) * 4 + 4; + + if (n + need < bufsize) { + strcat((char* )buf, ": /"); + s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); + + p = pat; + while (p < pat_end) { + if (*p == MC_ESC(enc)) { + *s++ = *p++; + len = enc_len(enc, p); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )MC_ESC(enc); + *s++ = *p++; + } + else if (ONIGENC_IS_MBC_HEAD(enc, p)) { + len = enc_len(enc, p); + if (ONIGENC_MBC_MINLEN(enc) == 1) { + while (len-- > 0) *s++ = *p++; + } + else { /* for UTF16 */ + int blen; + + while (len-- > 0) { + sprintf((char* )bs, "\\%03o", *p++ & 0377); + blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (blen-- > 0) *s++ = *bp++; + } + } + } + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprintf((char* )bs, "\\%03o", *p++ & 0377); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (len-- > 0) *s++ = *bp++; + } + else { + *s++ = *p++; + } + } + + *s++ = '/'; + *s = '\0'; + } +} diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c new file mode 100644 index 0000000..918aa67 --- /dev/null +++ b/ext/mbstring/oniguruma/regexec.c @@ -0,0 +1,3949 @@ +/********************************************************************** + regexec.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) +#endif + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new(void) +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(n * sizeof(int)); + region->end = (int* )xmalloc(n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + else if (region->allocated < n) { + region->beg = (int* )xrealloc(region->beg, n * sizeof(int)); + region->end = (int* )xrealloc(region->end, n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + + return 0; +} + +extern int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; +} + +extern OnigRegion* +onig_region_new(void) +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i; + + if (to == from) return; + + if (to->allocated == 0) { + if (from->num_regs > 0) { + to->beg = (int* )xmalloc(RREGC_SIZE); + to->end = (int* )xmalloc(RREGC_SIZE); + to->allocated = from->num_regs; + } + } + else if (to->allocated < from->num_regs) { + to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); + to->end = (int* )xrealloc(to->end, RREGC_SIZE); + to->allocated = from->num_regs; + } + + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 +typedef long StackIndex; + +typedef struct _StackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + StackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + StackIndex start; /* prev. info (for backtrack "(...)*" ) */ + StackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} StackType; + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 +/* handled by normal-POP */ +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 +/* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif +} MatchArg; + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while (0) +#else +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while (0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ + (msa).state_check_buff = (void* )xmalloc(size);\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ +} while (0) + +#define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while (0); +#else +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif + + + +#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ + if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ + stk_alloc = (StackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ + + sizeof(StackType) * (stack_num));\ + stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base;\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(StackType** arg_stk_base, StackType** arg_stk_end, + StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) +{ + unsigned int n; + StackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (StackType* )xmalloc(sizeof(StackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(StackType)); + n *= 2; + } + else { + n *= 2; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } + x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +#define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + int x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +#define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define ELSE_IF_STATE_CHECK_MARK(stk) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while (0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while (0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } +#else +#define STACK_BASE_CHECK(p, at) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + StackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while (0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int ambig_flag, + UChar* s1, UChar** ps2, int mblen) +{ + UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *p1, *p2, *end, *s2, *end2; + int len1, len2; + + s2 = *ps2; + end = s1 + mblen; + end2 = s2 + mblen; + while (s1 < end) { + len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1); + len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + + +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#define IS_EMPTY_STR (str == end) + +#define DATA_ENSURE(n) \ + if (s + (n) > end) goto fail + +#define DATA_ENSURE_CHECK(n) (s + (n) <= end) + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, + StackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + StackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef USE_BACKREF_AT_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , StackType* top, StackType* stk_base + , int ignore_case, int ambig_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, ambig_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_AT_LEVEL */ + + +#ifdef RUBY_PLATFORM + +typedef struct { + int state; + regex_t* reg; + MatchArg* msa; + StackType* stk_base; +} TrapEnsureArg; + +static VALUE +trap_ensure(VALUE arg) +{ + TrapEnsureArg* ta = (TrapEnsureArg* )arg; + + if (ta->state == 0) { /* trap_exec() is not normal return */ + ONIG_STATE_DEC_THREAD(ta->reg); + if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) + xfree(ta->stk_base); + + MATCH_ARG_FREE(*(ta->msa)); + } + + return Qnil; +} + +static VALUE +trap_exec(VALUE arg) +{ + TrapEnsureArg* ta; + + rb_trap_exec(); + + ta = (TrapEnsureArg* )arg; + ta->state = 1; /* normal return */ + return Qnil; +} + +extern void +onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base) +{ + VALUE arg; + TrapEnsureArg ta; + + ta.state = 0; + ta.reg = reg; + ta.msa = msa; + ta.stk_base = stk_base; + arg = (VALUE )(&ta); + rb_ensure(trap_exec, arg, trap_ensure, arg); +} + +#define CHECK_INTERRUPT_IN_MATCH_AT do {\ + if (rb_trap_pending) {\ + if (! rb_prohibit_interrupt) {\ + onig_exec_trap(reg, msa, stk_base);\ + }\ + }\ +} while (0) +#else +#define CHECK_INTERRUPT_IN_MATCH_AT +#endif /* RUBY_PLATFORM */ + +#ifdef ONIG_DEBUG_STATISTICS + +#define USE_TIMEOFDAY + +#ifdef USE_TIMEOFDAY +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +static struct timeval ts, te; +#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +#else +#ifdef HAVE_SYS_TIMES_H +#include <sys/times.h> +#endif +static struct tms ts, te; +#define GETTIME(t) times(&(t)) +#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +#endif + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +#define STAT_OP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while (0) + +#define STAT_OP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while (0) + +#ifdef RUBY_PLATFORM + +/* + * :nodoc: + */ +static VALUE onig_stat_print(void) +{ + onig_print_statistics(stderr); + return Qnil; +} +#endif + +extern void onig_statistics_init(void) +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; + +#ifdef RUBY_PLATFORM + rb_define_global_function("onig_stat_print", onig_stat_print, 0); +#endif +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +#define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while (0) + +#else +#define STACK_INC stk++ + +#define STAT_OP_IN(opcode) +#define STAT_OP_OUT +#endif + +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +static int +is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_CCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return is_code_in_cc(len, code, cc); +} + + +/* matching region of POSIX API */ +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} posix_regmatch_t; + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static int +match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, + UChar* sprev, MatchArg* msa) +{ + static UChar FinishCode[] = { OP_FINISH }; + + int i, n, num_mem, best_len, pop_level; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigAmbigType ambig_flag = reg->ambig_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + char *alloca_base; + StackType *stk_alloc, *stk_base, *stk, *stk_end; + StackType *stkp; /* used as any purpose. */ + StackIndex si; + StackIndex *repeat_stk; + StackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif + n = reg->num_repeat + reg->num_mem * 2; + + STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (StackIndex* )alloca_base; + + mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + num_mem; + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ + for (i = 1; i <= num_mem; i++) { + mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + } + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", + (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); +#endif + + STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + while (1) { +#ifdef ONIG_DEBUG_MATCH + { + UChar *q, *bp, buf[50]; + int len; + fprintf(stderr, "%4d> \"", (int )(s - str)); + bp = buf; + for (i = 0, q = s; i < 7 && q < end; i++) { + len = enc_len(encode, q); + while (len-- > 0) *bp++ = *q++; + } + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } + else { xmemcpy(bp, "\"", 1); bp += 1; } + *bp = 0; + fputs(buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "\n"); + } +#endif + + sbegin = s; + switch (*p++) { + case OP_END: STAT_OP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif + best_len = n; + region = msa->region; + if (region) { +#ifdef USE_POSIX_REGION_OPTION + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = sstart - str; + rmt[0].rm_eo = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { +#endif /* USE_POSIX_REGION_OPTION */ + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ +#ifdef USE_POSIX_REGION_OPTION + } /* else IS_POSIX_REGION() */ +#endif + } /* if (region) */ + } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif + STAT_OP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && s < end) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + break; + + case OP_EXACT1: STAT_OP_IN(OP_EXACT1); +#if 0 + DATA_ENSURE(1); + if (*p != *s) goto fail; + p++; s++; +#endif + if (*p != *s++) goto fail; + DATA_ENSURE(0); + p++; + STAT_OP_OUT; + break; + + case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + + DATA_ENSURE(1); + ss = s; + sp = p; + + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + STAT_OP_OUT; + break; + + case OP_EXACT2: STAT_OP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT3: STAT_OP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT4: STAT_OP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT5: STAT_OP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTN: STAT_OP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); + { + int len; + UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + ss = s; + sp = p; + + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + } + + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + break; + + case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + STAT_OP_OUT; + continue; + break; + + case OP_CCLASS: STAT_OP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + STAT_OP_OUT; + break; + + case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enc_len(encode, s); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + STAT_OP_OUT; + break; + + case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + STAT_OP_OUT; + break; + + case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enc_len(encode, s); + + if (s + mb_len > end) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + STAT_OP_OUT; + break; + + case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + STAT_OP_OUT; + break; + + case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enc_len(encode, s); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (is_code_in_cc(mb_len, code, node) == 0) goto fail; + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enc_len(encode, s); + DATA_ENSURE(n); + s += n; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (s < end) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + p++; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (s < end) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enc_len(encode, s); + if (n >1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + STAT_OP_OUT; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (s < end) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + STAT_OP_OUT; + break; + + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (s < end) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enc_len(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + STAT_OP_OUT; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_WORD: STAT_OP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + STAT_OP_OUT; + continue; + break; + + case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + STAT_OP_OUT; + continue; + break; + +#ifdef USE_WORD_BEGIN_END + case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + STAT_OP_OUT; + continue; + } + } + goto fail; + break; + + case OP_WORD_END: STAT_OP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + STAT_OP_OUT; + continue; + } + } + goto fail; + break; +#endif + + case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_END_BUF: STAT_OP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + STAT_OP_OUT; + continue; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + STAT_OP_OUT; + continue; + } + goto fail; + break; + + case OP_END_LINE: STAT_OP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + STAT_OP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { + STAT_OP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + STAT_OP_OUT; + continue; + } +#endif + goto fail; + break; + + case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ + STAT_OP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enc_len(encode, s))) { + STAT_OP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enc_len(encode, s); + if (ON_STR_END(ss + enc_len(encode, ss))) { + STAT_OP_OUT; + continue; + } + } +#endif + goto fail; + break; + + case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); + if (s != msa->start) + goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (StackIndex )((void* )s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (StackIndex )((void* )s); + STAT_OP_OUT; + continue; + break; + +#ifdef USE_SUBEXP_CALL + case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (StackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + STAT_OP_OUT; + continue; + break; +#endif + + case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); + mem = 1; + goto backref; + break; + + case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); + mem = 2; + goto backref; + break; + + case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(ambig_flag, pstart, &s, n); + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + STAT_OP_OUT; + continue; + } + break; + +#ifdef USE_BACKREF_AT_LEVEL + case OP_BACKREF_AT_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag + , (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + STAT_OP_OUT; + continue; + } + + break; +#endif + + case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); + GET_OPTION_INC(option, p); + STACK_PUSH_ALT(p, s, sprev); + p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; + STAT_OP_OUT; + continue; + break; + + case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); + GET_OPTION_INC(option, p); + STAT_OP_OUT; + continue; + break; + + case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + STAT_OP_OUT; + continue; + break; + +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + STAT_OP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + STAT_OP_OUT; + continue; + break; +#endif + + case OP_JUMP: STAT_OP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_PUSH: STAT_OP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + STAT_OP_OUT; + continue; + break; + + case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + } + STAT_OP_OUT; + continue; + break; + + case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + STAT_OP_OUT; + continue; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_POP: STAT_OP_IN(OP_POP); + STACK_POP_ONE; + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK(1)) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + } + p += (addr + 1); + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + } + p++; + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT: STAT_OP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } + } + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } + } + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; + + case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + + case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev); + STAT_OP_OUT; + continue; + break; + + case OP_POP_POS: STAT_OP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + break; + + case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + break; + + case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + STAT_OP_OUT; + continue; + break; + + case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + STAT_OP_OUT; + continue; + break; + + case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?<!XXX)a/.match("a") + If you want to change to fail, replace following line. */ + p += addr; + /* goto fail; */ + } + else { + STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); + s = q; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + } + STAT_OP_OUT; + continue; + break; + + case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); + STACK_POP_TIL_LOOK_BEHIND_NOT; + goto fail; + break; + +#ifdef USE_SUBEXP_CALL + case OP_CALL: STAT_OP_IN(OP_CALL); + GET_ABSADDR_INC(addr, p); + STACK_PUSH_CALL_FRAME(p); + p = reg->p + addr; + STAT_OP_OUT; + continue; + break; + + case OP_RETURN: STAT_OP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + STAT_OP_OUT; + continue; + break; +#endif + + case OP_FINISH: + goto finish; + break; + + fail: + STAT_OP_OUT; + /* fall */ + case OP_FAIL: STAT_OP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + + STAT_OP_OUT; + continue; + break; + + default: + goto bytecode_error; + + } /* end of switch */ + sprev = sbegin; + } /* end of while(1) */ + + finish: + STACK_SAVE; + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s += enc_len(enc, s); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int ambig_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + const UChar* tsave; + const UChar* psave; + + tsave = t; + psave = p; + + while (t < tend) { + lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) { + return 0; + } + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int ambig_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + return s; + + s += enc_len(enc, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int ambig_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (str_lower_case_match(enc, ambig_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + int skip, tlen1; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )s; + + skip = reg->map[*se]; + t = s; + do { + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); + } + } + else { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )s; + + skip = reg->int_map[*se]; + t = s; + do { + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); + } + } + + return (UChar* )NULL; +} + +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (target_end - target) - 1; + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->map[*s]; + } + } + else { /* see int_map[] */ + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->int_map[*s]; + } + } + return (UChar* )NULL; +} + +static int +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) + +{ + int i, len; + + if (IS_NULL(*skip)) { + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; + } + + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + (*skip)[i] = len; + + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + + return 0; +} + +static UChar* +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + const UChar *s, *t, *p; + + s = text_end - (target_end - target); + if (text_start < s) + s = text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + + while (s >= text) { + p = s; + t = target; + while (t < target_end && *p == *t) { + p++; t++; + } + if (t == target_end) + return (UChar* )s; + + s -= reg->int_map_backward[*s]; + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enc_len(enc, s); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s); + } + return (UChar* )NULL; +} + +extern int +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + int r; + UChar *prev; + MatchArg msa; + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + + MATCH_ARG_INIT(msa, option, region, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); + r = match_at(reg, str, end, at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", + (int )str, (int )end, (int )s, (int )range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + while (p < q) p += enc_len(reg->enc, p); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enc_len(reg->enc, p); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, + int** skip)); + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static int +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + int r; + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_backward_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + if (IS_NULL(reg->int_map_backward)) { + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) + goto exact_method; + + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, + &(reg->int_map_backward)); + if (r) return r; + } + p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, + end, p); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern int +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + int r; + UChar *s, *prev; + MatchArg msa; + const UChar *orig_start = start; + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar *min_semi_end, *max_semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + range = start + 1; + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + min_semi_end = max_semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end); + } + } + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; + } + else { + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); + } + if (range > start) goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + + max_semi_end = (UChar* )end; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } + } + else { + min_semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar* address_for_empty_string = (UChar* )""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; +#endif + MATCH_AND_RETURN_CHECK; + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, orig_start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + + if ((end - start) < reg->threshold_len) + goto mismatch; + + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK; + prev = s; + s += enc_len(reg->enc, s); + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK; + prev = s; + s += enc_len(reg->enc, s); + + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enc_len(reg->enc, s); + } + } while (s < range); + goto mismatch; + } + } + } + + do { + MATCH_AND_RETURN_CHECK; + prev = s; + s += enc_len(reg->enc, s); + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK; + } + } + else { /* backward search */ + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK; + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK; + s = prev; + } while (s >= range); + } + + mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not setted in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: + ONIG_STATE_DEC_THREAD(reg); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + match: + ONIG_STATE_DEC_THREAD(reg); + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigAmbigType +onig_get_ambig_flag(regex_t* reg) +{ + return reg->ambig_flag; +} + +extern OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} + diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c new file mode 100644 index 0000000..f5ad1f3 --- /dev/null +++ b/ext/mbstring/oniguruma/regext.c @@ -0,0 +1,215 @@ +/********************************************************************** + regext.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +static void +conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = *s++; + } +} + +static void +conv_ext0le32(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = *s++; + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = '\0'; + } +} + +static void +conv_ext0be(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = '\0'; + *conv++ = *s++; + } +} + +static void +conv_ext0le(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = *s++; + *conv++ = '\0'; + } +} + +static void +conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = s[3]; + *conv++ = s[2]; + *conv++ = s[1]; + *conv++ = s[0]; + s += 4; + } +} + +static void +conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = s[1]; + *conv++ = s[0]; + s += 2; + } +} + +static int +conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, + UChar** conv, UChar** conv_end) +{ + int len = end - s; + + if (to == ONIG_ENCODING_UTF16_BE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 2); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + (len * 2); + conv_ext0be(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF16_LE) { + swap16: + *conv = (UChar* )xmalloc(len); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + len; + conv_swap2bytes(s, end, *conv); + return 0; + } + } + else if (to == ONIG_ENCODING_UTF16_LE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 2); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + (len * 2); + conv_ext0le(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF16_BE) { + goto swap16; + } + } + if (to == ONIG_ENCODING_UTF32_BE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 4); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + (len * 4); + conv_ext0be32(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF32_LE) { + swap32: + *conv = (UChar* )xmalloc(len); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + len; + conv_swap4bytes(s, end, *conv); + return 0; + } + } + else if (to == ONIG_ENCODING_UTF32_LE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 4); + CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY); + *conv_end = *conv + (len * 4); + conv_ext0le32(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF32_BE) { + goto swap32; + } + } + + return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; +} + +extern int +onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) +{ + int r; + UChar *cpat, *cpat_end; + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + if (ci->pattern_enc != ci->target_enc) { + r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, + &cpat, &cpat_end); + if (r) return r; + } + else { + cpat = (UChar* )pattern; + cpat_end = (UChar* )pattern_end; + } + + r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc, + ci->syntax); + if (r) goto err; + + r = onig_compile(*reg, cpat, cpat_end, einfo); + if (r) { + onig_free(*reg); + *reg = NULL; + } + + err: + if (cpat != pattern) xfree(cpat); + + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c new file mode 100644 index 0000000..248957c --- /dev/null +++ b/ext/mbstring/oniguruma/reggnu.c @@ -0,0 +1,175 @@ +/********************************************************************** + reggnu.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifndef ONIGGNU_H +#include "oniggnu.h" +#endif + +extern void +re_free_registers(OnigRegion* r) +{ + /* 0: don't free self */ + onig_region_free(r, 0); +} + +extern int +re_adjust_startpos(regex_t* reg, const char* string, int size, + int startpos, int range) +{ + if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { + UChar *p; + UChar *s = (UChar* )string + startpos; + + if (range > 0) { + p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); + } + else { + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); + } + return p - (UChar* )string; + } + + return startpos; +} + +extern int +re_match(regex_t* reg, const char* str, int size, int pos, + struct re_registers* regs) +{ + return onig_match(reg, (UChar* )str, (UChar* )(str + size), + (UChar* )(str + pos), regs, ONIG_OPTION_NONE); +} + +extern int +re_search(regex_t* bufp, const char* string, int size, int startpos, int range, + struct re_registers* regs) +{ + return onig_search(bufp, (UChar* )string, (UChar* )(string + size), + (UChar* )(string + startpos), + (UChar* )(string + startpos + range), + regs, ONIG_OPTION_NONE); +} + +extern int +re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) +{ + int r; + OnigErrorInfo einfo; + + r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); + if (r != 0) { + if (IS_NOT_NULL(ebuf)) + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); + } + + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) +{ + int r; + OnigErrorInfo einfo; + OnigEncoding enc; + + /* I think encoding and options should be arguments of this function. + But this is adapted to present re.c. (2002/11/29) + */ + enc = OnigEncDefaultCharEncoding; + + r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), + reg->options, enc, OnigDefaultSyntax, &einfo); + if (r != 0) { + if (IS_NOT_NULL(ebuf)) + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); + } + return r; +} +#endif + +extern void +re_free_pattern(regex_t* reg) +{ + onig_free(reg); +} + +extern int +re_alloc_pattern(regex_t** reg) +{ + return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, + ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + OnigEncDefaultCharEncoding, + OnigDefaultSyntax); +} + +extern void +re_set_casetable(const char* table) +{ + onigenc_set_default_caseconv_table((UChar* )table); +} + +extern void +#ifdef ONIG_RUBY_M17N +re_mbcinit(OnigEncoding enc) +#else +re_mbcinit(int mb_code) +#endif +{ +#ifdef ONIG_RUBY_M17N + + onigenc_set_default_encoding(enc); + +#else + + OnigEncoding enc; + + switch (mb_code) { + case RE_MBCTYPE_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case RE_MBCTYPE_EUC: + enc = ONIG_ENCODING_EUC_JP; + break; + case RE_MBCTYPE_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case RE_MBCTYPE_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); +#endif +} diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h new file mode 100644 index 0000000..d6819d8 --- /dev/null +++ b/ext/mbstring/oniguruma/regint.h @@ -0,0 +1,830 @@ +#ifndef REGINT_H +#define REGINT_H +/********************************************************************** + regint.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* for debug */ +/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_COMPILE */ +/* #define ONIG_DEBUG_SEARCH */ +/* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DONT_OPTIMIZE */ + +/* for byte-code statistical data. */ +/* #define ONIG_DEBUG_STATISTICS */ + +#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ + defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ + defined(ONIG_DEBUG_STATISTICS) +#ifndef ONIG_DEBUG +#define ONIG_DEBUG +#endif +#endif + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + (defined(__ppc__) && defined(__APPLE__)) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(__mc68020__) +#define PLATFORM_UNALIGNED_WORD_ACCESS +#endif + +/* config */ +/* spec. config */ +/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ +#define USE_NAMED_GROUP +#define USE_SUBEXP_CALL +#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ +#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ +#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* treat \r\n as line terminator. + !!! NO SUPPORT !!! + use this configuration on your own responsibility */ +/* #define USE_CRNL_AS_LINE_TERMINATOR */ + +/* internal config */ +#define USE_RECYCLE_NODE +#define USE_OP_PUSH_OR_JUMP_EXACT +#define USE_QUANTIFIER_PEEK_NEXT +#define USE_ST_HASH_TABLE +#define USE_SHARED_CCLASS_TABLE + +#define INIT_MATCH_STACK_SIZE 160 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ + +/* interface to external system */ +#ifdef NOT_RUBY /* given from Makefile */ +#include "config.h" +#define USE_BACKREF_AT_LEVEL +#define USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ +#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ +/* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ +#define xmalloc malloc +#define xrealloc realloc +#define xcalloc calloc +#define xfree free +#else +#include "ruby.h" +#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ + +#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ +#define USE_MULTI_THREAD_SYSTEM +#define THREAD_SYSTEM_INIT +#define THREAD_SYSTEM_END +#define THREAD_ATOMIC_START DEFER_INTS +#define THREAD_ATOMIC_END ENABLE_INTS +#define THREAD_PASS rb_thread_schedule() + +#define DEFAULT_WARN_FUNCTION onig_rb_warn +#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning + +#endif /* else NOT_RUBY */ + +#define STATE_CHECK_STRING_THRESHOLD_LEN 7 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 + +#define THREAD_PASS_LIMIT_COUNT 8 +#define xmemset memset +#define xmemcpy memcpy +#define xmemmove memmove +#if defined(_WIN32) && !defined(__GNUC__) +#define xalloca _alloca +#if _MSC_VER < 1500 +#ifndef vsnprintf +#define vsnprintf _vsnprintf +#endif +#endif +#else +#define xalloca alloca +#endif + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) +#else +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + + +#define onig_st_is_member st_is_member + +#ifdef NOT_RUBY + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_init_strend_table_with_size onig_st_init_strend_table_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_insert_strend onig_st_insert_strend +#define st_lookup onig_st_lookup +#define st_lookup_strend onig_st_lookup_strend +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_add_direct_strend onig_st_add_direct_strend +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free + +#else /* NOT_RUBY */ + +#define onig_st_init_table st_init_table +#define onig_st_init_table_with_size st_init_table_with_size +#define onig_st_init_numtable st_init_numtable +#define onig_st_init_numtable_with_size st_init_numtable_with_size +#define onig_st_init_strtable st_init_strtable +#define onig_st_init_strtable_with_size st_init_strtable_with_size +#define onig_st_init_strend_table_with_size st_init_strend_table_with_size +#define onig_st_delete st_delete +#define onig_st_delete_safe st_delete_safe +#define onig_st_insert st_insert +#define onig_st_insert_strend st_insert_strend +#define onig_st_lookup st_lookup +#define onig_st_lookup_strend st_lookup_strend +#define onig_st_foreach st_foreach +#define onig_st_add_direct st_add_direct +#define onig_st_add_direct_strend st_add_direct_strend +#define onig_st_free_table st_free_table +#define onig_st_cleanup_safe st_cleanup_safe +#define onig_st_copy st_copy +#define onig_st_nothing_key_clone st_nothing_key_clone +#define onig_st_nothing_key_free st_nothing_key_free + +#endif /* NOT_RUBY */ + + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) +#include <alloca.h> +#endif + +#ifdef HAVE_STRING_H +# include <string.h> +#else +# include <strings.h> +#endif + +#include <ctype.h> +#ifdef HAVE_SYS_TYPES_H +#ifndef __BORLANDC__ +#include <sys/types.h> +#endif +#endif + +#ifdef __BORLANDC__ +#include <malloc.h> +#endif + +#ifdef ONIG_DEBUG +# include <stdio.h> +#endif + +#include "regenc.h" +#include "oniguruma.h" + +#ifdef MIN +#undef MIN +#endif +#ifdef MAX +#undef MAX +#endif +#define MIN(a,b) (((a)>(b))?(b):(a)) +#define MAX(a,b) (((a)<(b))?(b):(a)) + +#define IS_NULL(p) (((void*)(p)) == (void*)0) +#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL +#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val) +#define NULL_UCHARP ((UChar* )0) + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG + +#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ + (pad_size) = WORD_ALIGNMENT_SIZE \ + - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ + if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ +} while (0) + +#define ALIGNMENT_RIGHT(addr) do {\ + (addr) += (WORD_ALIGNMENT_SIZE - 1);\ + (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ +} while (0) + + +#define B_SHIFT 8 +#define B_MASK 0xff + +#define SERIALIZE_2BYTE_INT(i,p) do {\ + *(p) = ((i) >> B_SHIFT) & B_MASK;\ + *((p)+1) = (i) & B_MASK;\ +} while (0) + +#define SERIALIZE_4BYTE_INT(i,p) do {\ + *(p) = ((i) >> B_SHIFT*3) & B_MASK;\ + *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\ + *((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\ + *((p)+3) = (i) & B_MASK;\ +} while (0) + +#define SERIALIZE_8BYTE_INT(i,p) do {\ + *(p) = ((i) >> B_SHIFT*7) & B_MASK;\ + *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\ + *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\ + *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\ + *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\ + *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\ + *((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\ + *((p)+7) = (i) & B_MASK;\ +} while (0) + +#define GET_2BYTE_INT_INC(type,i,p) do {\ + (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\ + (p) += 2;\ +} while (0) + +#define GET_4BYTE_INT_INC(type,i,p) do {\ + (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \ + ((unsigned int )((p)[1]) << B_SHIFT*2) | \ + ((unsigned int )((p)[2]) << B_SHIFT ) | \ + ((unsigned int )((p)[3]) )); \ + (p) += 4;\ +} while (0) + +#define GET_8BYTE_INT_INC(type,i,p) do {\ + (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \ + ((unsigned long )((p)[1]) << B_SHIFT*6) | \ + ((unsigned long )((p)[2]) << B_SHIFT*5) | \ + ((unsigned long )((p)[3]) << B_SHIFT*4) | \ + ((unsigned long )((p)[4]) << B_SHIFT*3) | \ + ((unsigned long )((p)[5]) << B_SHIFT*2) | \ + ((unsigned long )((p)[6]) << B_SHIFT ) | \ + ((unsigned long )((p)[7]) )); \ + (p) += 8;\ +} while (0) + +#if SIZEOF_SHORT == 2 +#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p) +#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p) +#elif SIZEOF_SHORT == 4 +#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p) +#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p) +#elif SIZEOF_SHORT == 8 +#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p) +#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p) +#endif + +#if SIZEOF_INT == 2 +#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p) +#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p) +#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p) +#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p) +#elif SIZEOF_INT == 4 +#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p) +#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p) +#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p) +#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p) +#elif SIZEOF_INT == 8 +#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p) +#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p) +#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p) +#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p) +#endif + +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +/* stack pop level */ +#define STACK_POP_LEVEL_FREE 0 +#define STACK_POP_LEVEL_MEM_START 1 +#define STACK_POP_LEVEL_ALL 2 + +/* optimize flags */ +#define ONIG_OPTIMIZE_NONE 0 +#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ +#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ +#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ +#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ +#define ONIG_OPTIMIZE_MAP 5 /* char map */ + +/* bit status */ +typedef unsigned int BitStatusType; + +#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) +#define BIT_STATUS_CLEAR(stats) (stats) = 0 +#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) +#define BIT_STATUS_AT(stats,n) \ + ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + +#define BIT_STATUS_ON_AT(stats,n) do {\ + if ((n) < BIT_STATUS_BITS_NUM)\ + (stats) |= (1 << (n));\ + else\ + (stats) |= 1;\ +} while (0) + +#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ + if ((n) < BIT_STATUS_BITS_NUM)\ + (stats) |= (1 << (n));\ +} while (0) + + +#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) + +#define DIGITVAL(code) ((code) - '0') +#define ODIGITVAL(code) DIGITVAL(code) +#define XDIGITVAL(enc,code) \ + (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) + +#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) +#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option)) +#define IS_FIND_CONDITION(option) ((option) & \ + (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) +#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) + +/* OP_SET_OPTION is required for these options. +#define IS_DYNAMIC_OPTION(option) \ + (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) +*/ +/* ignore-case and multibyte status are included in compiled code. */ +#define IS_DYNAMIC_OPTION(option) 0 + +#define REPEAT_INFINITE -1 +#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) + +/* bitset */ +#define BITS_PER_BYTE 8 +#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) +#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) +#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +typedef unsigned int Bits; +#else +typedef unsigned char Bits; +#endif +typedef Bits BitSet[BITSET_SIZE]; +typedef Bits* BitSetRef; + +#define SIZE_BITSET sizeof(BitSet) + +#define BITSET_CLEAR(bs) do {\ + int i;\ + for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\ +} while (0) + +#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) + +#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) +#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) +#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) +#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) + +/* bytes buffer */ +typedef struct _BBuf { + UChar* p; + unsigned int used; + unsigned int alloc; +} BBuf; + +#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) + +#define BBUF_SIZE_INC(buf,inc) do{\ + (buf)->alloc += (inc);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_EXPAND(buf,low) do{\ + do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_ENSURE_SIZE(buf,size) do{\ + unsigned int new_alloc = (buf)->alloc;\ + while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ + if ((buf)->alloc != new_alloc) {\ + (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ + (buf)->alloc = new_alloc;\ + }\ +} while (0) + +#define BBUF_WRITE(buf,pos,bytes,n) do{\ + int used = (pos) + (n);\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_WRITE1(buf,pos,byte) do{\ + int used = (pos) + 1;\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + (buf)->p[(pos)] = (byte);\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) +#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) +#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) +#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) + +/* from < to */ +#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ + if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ + if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ + (buf)->used -= (from - to);\ +} while (0) + +#define BBUF_INSERT(buf,pos,bytes,n) do {\ + if (pos >= (buf)->used) {\ + BBUF_WRITE(buf,pos,bytes,n);\ + }\ + else {\ + BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + }\ +} while (0) + +#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] + + +#define ANCHOR_BEGIN_BUF (1<<0) +#define ANCHOR_BEGIN_LINE (1<<1) +#define ANCHOR_BEGIN_POSITION (1<<2) +#define ANCHOR_END_BUF (1<<3) +#define ANCHOR_SEMI_END_BUF (1<<4) +#define ANCHOR_END_LINE (1<<5) + +#define ANCHOR_WORD_BOUND (1<<6) +#define ANCHOR_NOT_WORD_BOUND (1<<7) +#define ANCHOR_WORD_BEGIN (1<<8) +#define ANCHOR_WORD_END (1<<9) +#define ANCHOR_PREC_READ (1<<10) +#define ANCHOR_PREC_READ_NOT (1<<11) +#define ANCHOR_LOOK_BEHIND (1<<12) +#define ANCHOR_LOOK_BEHIND_NOT (1<<13) + +#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ + +/* operation code */ +enum OpCode { + OP_FINISH = 0, /* matching process terminator (no more alternative) */ + OP_END = 1, /* pattern code terminator (success end) */ + + OP_EXACT1 = 2, /* single byte, N = 1 */ + OP_EXACT2, /* single byte, N = 2 */ + OP_EXACT3, /* single byte, N = 3 */ + OP_EXACT4, /* single byte, N = 4 */ + OP_EXACT5, /* single byte, N = 5 */ + OP_EXACTN, /* single byte */ + OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ + OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ + OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ + OP_EXACTMB2N, /* mb-length = 2 */ + OP_EXACTMB3N, /* mb-length = 3 */ + OP_EXACTMBN, /* other length */ + + OP_EXACT1_IC, /* single byte, N = 1, ignore case */ + OP_EXACTN_IC, /* single byte, ignore case */ + + OP_CCLASS, + OP_CCLASS_MB, + OP_CCLASS_MIX, + OP_CCLASS_NOT, + OP_CCLASS_MB_NOT, + OP_CCLASS_MIX_NOT, + OP_CCLASS_NODE, /* pointer to CClassNode node */ + + OP_ANYCHAR, /* "." */ + OP_ANYCHAR_ML, /* "." multi-line */ + OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ + OP_ANYCHAR_STAR_PEEK_NEXT, + OP_ANYCHAR_ML_STAR_PEEK_NEXT, + + OP_WORD, + OP_NOT_WORD, + OP_WORD_BOUND, + OP_NOT_WORD_BOUND, + OP_WORD_BEGIN, + OP_WORD_END, + + OP_BEGIN_BUF, + OP_END_BUF, + OP_BEGIN_LINE, + OP_END_LINE, + OP_SEMI_END_BUF, + OP_BEGIN_POSITION, + + OP_BACKREF1, + OP_BACKREF2, + OP_BACKREFN, + OP_BACKREFN_IC, + OP_BACKREF_MULTI, + OP_BACKREF_MULTI_IC, + OP_BACKREF_AT_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ + + OP_MEMORY_START, + OP_MEMORY_START_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ + OP_MEMORY_END, + OP_MEMORY_END_REC, /* push marker to stack */ + + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION, /* set option */ + + OP_FAIL, /* pop stack and move */ + OP_JUMP, + OP_PUSH, + OP_POP, + OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ + OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ + OP_REPEAT, /* {n,m} */ + OP_REPEAT_NG, /* {n,m}? (non greedy) */ + OP_REPEAT_INC, + OP_REPEAT_INC_NG, /* non greedy */ + OP_REPEAT_INC_SG, /* search and get in stack */ + OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ + OP_NULL_CHECK_START, /* null loop checker start */ + OP_NULL_CHECK_END, /* null loop checker end */ + OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ + + OP_PUSH_POS, /* (?=...) start */ + OP_POP_POS, /* (?=...) end */ + OP_PUSH_POS_NOT, /* (?!...) start */ + OP_FAIL_POS, /* (?!...) end */ + OP_PUSH_STOP_BT, /* (?>...) start */ + OP_POP_STOP_BT, /* (?>...) end */ + OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ + OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ + + OP_CALL, /* \g<name> */ + OP_RETURN, + + OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + OP_STATE_CHECK, /* check only */ + OP_STATE_CHECK_ANYCHAR_STAR, + OP_STATE_CHECK_ANYCHAR_ML_STAR +}; + +typedef int RelAddrType; +typedef int AbsAddrType; +typedef int LengthType; +typedef int RepeatNumType; +typedef short int MemNumType; +typedef short int StateCheckNumType; +typedef void* PointerType; + +#define SIZE_OPCODE 1 +#define SIZE_RELADDR sizeof(RelAddrType) +#define SIZE_ABSADDR sizeof(AbsAddrType) +#define SIZE_LENGTH sizeof(LengthType) +#define SIZE_MEMNUM sizeof(MemNumType) +#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) +#define SIZE_REPEATNUM sizeof(RepeatNumType) +#define SIZE_OPTION sizeof(OnigOptionType) +#define SIZE_CODE_POINT sizeof(OnigCodePoint) +#define SIZE_POINTER sizeof(PointerType) + + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) +#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) +#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) +#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) +#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) +#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) +#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) +#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) + +/* code point's address must be aligned address. */ +#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) +#define GET_BYTE_INC(byte,p) do{\ + byte = *(p);\ + (p)++;\ +} while(0) + + +/* op-code + arg size */ +#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE +#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) +#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_POS SIZE_OPCODE +#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP_POS SIZE_OPCODE +#define SIZE_OP_FAIL_POS SIZE_OPCODE +#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_FAIL SIZE_OPCODE +#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE +#define SIZE_OP_POP_STOP_BT SIZE_OPCODE +#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) +#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) +#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE +#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) +#define SIZE_OP_RETURN SIZE_OPCODE + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#endif + +#define MC_ESC(enc) (enc)->meta_char_table.esc +#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar +#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime +#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time +#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time +#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime + +#define IS_MC_ESC_CODE(code, enc, syn) \ + ((code) == MC_ESC(enc) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + +/* cclass node */ +#define FLAG_CCLASS_NOT 1 +#define FLAG_CCLASS_SHARE (1<<1) + +#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT +#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT +#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE +#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0) +#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0) + +typedef struct { + int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + + +#ifdef ONIG_DEBUG + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OnigOpInfoType; + +extern OnigOpInfoType OnigOpInfo[]; + +extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); + +#ifdef ONIG_DEBUG_STATISTICS +extern void onig_statistics_init P_((void)); +extern void onig_print_statistics P_((FILE* f)); +#endif +#endif + +extern UChar* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); +extern int onig_bbuf_init P_((BBuf* buf, int size)); +extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); +extern void onig_chain_reduce P_((regex_t* reg)); +extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); +extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); + +#endif /* REGINT_H */ diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c new file mode 100644 index 0000000..abf2cc1 --- /dev/null +++ b/ext/mbstring/oniguruma/regparse.c @@ -0,0 +1,5290 @@ +/********************************************************************** + regparse.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +#define WARN_BUFSIZE 256 + +OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE +}; + +OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; + +extern void onig_null_warn(const char* s) { } + +#ifdef RUBY_PLATFORM +extern void +onig_rb_warn(const char* s) +{ + rb_warn("%s", s); +} + +extern void +onig_rb_warning(const char* s) +{ + rb_warning("%s", s); +} +#endif + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; +#endif + +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + +static void +bbuf_free(BBuf* bbuf) +{ + if (IS_NOT_NULL(bbuf)) { + if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); + xfree(bbuf); + } +} + +static int +bbuf_clone(BBuf** rto, BBuf* from) +{ + int r; + BBuf *to; + + *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); + r = BBUF_INIT(to, from->alloc); + if (r != 0) return r; + to->used = from->used; + xmemcpy(to->p, from->p, from->used); + return 0; +} + +#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) + +#define MBCODE_START_POS(enc) \ + (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) + +#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ + add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + +#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ + if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ + r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ + if (r) return r;\ + }\ +} while (0) + + +#define BITSET_IS_EMPTY(bs,empty) do {\ + int i;\ + empty = 1;\ + for (i = 0; i < BITSET_SIZE; i++) {\ + if ((bs)[i] != 0) {\ + empty = 0; break;\ + }\ + }\ +} while (0) + +static void +bitset_set_range(BitSetRef bs, int from, int to) +{ + int i; + for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { + BITSET_SET_BIT(bs, i); + } +} + +#if 0 +static void +bitset_set_all(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + bs[i] = ~((Bits )0); + } +} +#endif + +static void +bitset_invert(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + bs[i] = ~(bs[i]); + } +} + +static void +bitset_invert_to(BitSetRef from, BitSetRef to) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + to[i] = ~(from[i]); + } +} + +static void +bitset_and(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] &= bs[i]; + } +} + +static void +bitset_or(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] |= bs[i]; + } +} + +static void +bitset_copy(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] = bs[i]; + } +} + +extern int +onig_strncmp(const UChar* s1, const UChar* s2, int n) +{ + int x; + + while (n-- > 0) { + x = *s2++ - *s1++; + if (x) return x; + } + return 0; +} + +static void +k_strcpy(UChar* dest, const UChar* src, const UChar* end) +{ + int len = end - src; + if (len > 0) { + xmemcpy(dest, src, len); + dest[len] = (UChar )0; + } +} + +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} + + +/* scan pattern methods */ +#define PEND_VALUE 0 + +#define PFETCH_READY UChar* pfetch_prev +#define PEND (p < end ? 0 : 1) +#define PUNFETCH p = pfetch_prev +#define PINC do { \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) +#define PFETCH(c) do { \ + c = ONIGENC_MBC_TO_CODE(enc, p, end); \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) + +#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) +#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) + +static UChar* +k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, + int capa) +{ + UChar* r; + + if (dest) + r = (UChar* )xrealloc(dest, capa + 1); + else + r = (UChar* )xmalloc(capa + 1); + + CHECK_NULL_RETURN(r); + k_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +/* dest on static area */ +static UChar* +strcat_capa_from_static(UChar* dest, UChar* dest_end, + const UChar* src, const UChar* src_end, int capa) +{ + UChar* r; + + r = (UChar* )xmalloc(capa + 1); + CHECK_NULL_RETURN(r); + k_strcpy(r, dest, dest_end); + k_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_HASH_TABLE + +#include "st.h" + +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); + +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, +}; + +static st_table* +onig_st_init_strend_table_with_size(int size) +{ + return onig_st_init_table_with_size(&type_strend_hash, size); +} + +static int +onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) +{ + st_strend_key* key; + int result; + + key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +static int +strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +typedef st_table NameTable; +typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ + +#define NAMEBUF_SIZE 24 +#define NAMEBUF_SIZE_1 25 + +#ifdef ONIG_DEBUG +static int +i_print_name_entry(UChar* key, NameEntry* e, void* arg) +{ + int i; + FILE* fp = (FILE* )arg; + + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) + fputs("-", fp); + else if (e->back_num == 1) + fprintf(fp, "%d", e->back_ref1); + else { + for (i = 0; i < e->back_num; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[i]); + } + } + fputs("\n", fp); + return ST_CONTINUE; +} + +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + fprintf(fp, "name table\n"); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); + fputs("\n", fp); + } + return 0; +} +#endif + +static int +i_free_name_entry(UChar* key, NameEntry* e, void* arg) +{ + xfree(e->name); + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +names_clear(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_name_entry, 0); + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) onig_st_free_table(t); + reg->name_table = (void* )NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, const UChar* name, const UChar* name_end) +{ + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + e = (NameEntry* )NULL; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +typedef struct { + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; + int ret; + OnigEncoding enc; +} INamesArg; + +static int +i_names(UChar* key, NameEntry* e, INamesArg* arg) +{ + int r = (*(arg->func))(e->name, + /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */ + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); + if (r != 0) { + arg->ret = r; + return ST_STOP; + } + return ST_CONTINUE; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + INamesArg narg; + NameTable* t = (NameTable* )reg->name_table; + + narg.ret = 0; + if (IS_NOT_NULL(t)) { + narg.func = func; + narg.reg = reg; + narg.arg = arg; + narg.enc = reg->enc; /* should be pattern encoding. */ + onig_st_foreach(t, i_names, (HashDataType )&narg); + } + return narg.ret; +} + +static int +i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + +#else /* USE_ST_HASH_TABLE */ + +#define INIT_NAMES_ALLOC_NUM 8 + +typedef struct { + NameEntry* e; + int num; + int alloc; +} NameTable; + + +#ifdef ONIG_DEBUG +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + int i, j; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t) && t->num > 0) { + fprintf(fp, "name table\n"); + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) { + fputs("-", fp); + } + else if (e->back_num == 1) { + fprintf(fp, "%d", e->back_ref1); + } + else { + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } + } + fputs("\n", fp); + } + fputs("\n", fp); + } + return 0; +} +#endif + +static int +names_clear(regex_t* reg) +{ + int i; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + e->back_refs = (int* )NULL; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) xfree(t); + reg->name_table = NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, UChar* name, UChar* name_end) +{ + int i, len; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (NameEntry* )NULL; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + int i, r; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); + if (r != 0) return r; + } + } + return 0; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + +#endif /* else USE_ST_HASH_TABLE */ + +static int +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) +{ + int alloc; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (name_end - name <= 0) + return ONIGERR_EMPTY_GROUP_NAME; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_HASH_TABLE + if (IS_NULL(t)) { + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; + } + e = (NameEntry* )xmalloc(sizeof(NameEntry)); + CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); + + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + + e->name_len = name_end - name; + e->back_num = 0; + e->back_alloc = 0; + e->back_refs = (int* )NULL; + +#else + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (NameTable* )xmalloc(sizeof(NameTable)); + CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + reg->name_table = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = strdup_with_null(reg->enc, name, name_end); + e->name_len = name_end - name; +#endif + } + + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + + e->back_num++; + if (e->back_num == 1) { + e->back_ref1 = backref; + } + else { + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; + } + } + + return 0; +} + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + NameEntry* e; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; + + switch (e->back_num) { + case 0: + break; + case 1: + *nums = &(e->back_ref1); + break; + default: + *nums = e->back_refs; + break; + } + return e->back_num; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ + +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + + +#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + int i; + + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); + BIT_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif + env->mem_alloc = 0; + env->mem_nodes_dynamic = (Node** )NULL; + + for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) + env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + Node** p; + + need = env->num_mem + 1; + if (need >= SCANENV_MEMNODES_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_nodes_dynamic)) { + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); + } + else { + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + } + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + + for (i = env->num_mem + 1; i < alloc; i++) + p[i] = NULL_NODE; + + env->mem_nodes_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEM_NODES(env)[num] = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + + +#ifdef USE_RECYCLE_NODE +typedef struct _FreeNode { + struct _FreeNode* next; +} FreeNode; + +static FreeNode* FreeNodeList = (FreeNode* )NULL; +#endif + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + + switch (NTYPE(node)) { + case N_STRING: + if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { + xfree(NSTRING(node).s); + } + break; + + case N_LIST: + case N_ALT: + onig_node_free(NCONS(node).left); + /* onig_node_free(NCONS(node).right); */ + { + Node* next_node = NCONS(node).right; + +#ifdef USE_RECYCLE_NODE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif + + node = next_node; + goto start; + } + break; + + case N_CCLASS: + { + CClassNode* cc = &(NCCLASS(node)); + + if (IS_CCLASS_SHARE(cc)) + return ; + + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case N_QUANTIFIER: + if (NQUANTIFIER(node).target) + onig_node_free(NQUANTIFIER(node).target); + break; + + case N_EFFECT: + if (NEFFECT(node).target) + onig_node_free(NEFFECT(node).target); + break; + + case N_BACKREF: + if (IS_NOT_NULL(NBACKREF(node).back_dynamic)) + xfree(NBACKREF(node).back_dynamic); + break; + + case N_ANCHOR: + if (NANCHOR(node).target) + onig_node_free(NANCHOR(node).target); + break; + } + +#ifdef USE_RECYCLE_NODE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif +} + +#ifdef USE_RECYCLE_NODE +extern int +onig_free_node_list(void) +{ + FreeNode* n; + + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { + n = FreeNodeList; + FreeNodeList = FreeNodeList->next; + xfree(n); + } + /* THREAD_ATOMIC_END; */ + return 0; +} +#endif + +static Node* +node_new(void) +{ + Node* node; + +#ifdef USE_RECYCLE_NODE + THREAD_ATOMIC_START; + if (IS_NOT_NULL(FreeNodeList)) { + node = (Node* )FreeNodeList; + FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; + return node; + } + THREAD_ATOMIC_END; +#endif + + node = (Node* )xmalloc(sizeof(Node)); + return node; +} + + +static void +initialize_cclass(CClassNode* cc) +{ + BITSET_CLEAR(cc->bs); + cc->flags = 0; + cc->mbuf = NULL; +} + +static Node* +node_new_cclass(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CCLASS; + + initialize_cclass(&(NCCLASS(node))); + return node; +} + +static Node* +node_new_cclass_by_codepoint_range(int not, + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) +{ + CClassNode* cc; + int n, i, j; + + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CCLASS; + + cc = &(NCCLASS(node)); + cc->flags = 0; + if (not != 0) CCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (IS_NOT_NULL(sbr)) { + n = ONIGENC_CODE_RANGE_NUM(sbr); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + } + + if (IS_NULL(mbr)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(mbr); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(bbuf, NULL); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )mbr); + + cc->mbuf = bbuf; + } + + return node; +} + +static Node* +node_new_ctype(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CTYPE; + NCTYPE(node).type = type; + return node; +} + +static Node* +node_new_anychar(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ANYCHAR; + return node; +} + +static Node* +node_new_list(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_LIST; + NCONS(node).left = left; + NCONS(node).right = right; + return node; +} + +extern Node* +onig_node_new_list(Node* left, Node* right) +{ + return node_new_list(left, right); +} + +static Node* +node_new_alt(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ALT; + NCONS(node).left = left; + NCONS(node).right = right; + return node; +} + +extern Node* +onig_node_new_anchor(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ANCHOR; + NANCHOR(node).type = type; + NANCHOR(node).target = NULL; + NANCHOR(node).char_len = -1; + return node; +} + +static Node* +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_AT_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) +{ + int i; + Node* node = node_new(); + + CHECK_NULL_RETURN(node); + node->type = N_BACKREF; + NBACKREF(node).state = 0; + NBACKREF(node).back_num = back_num; + NBACKREF(node).back_dynamic = (int* )NULL; + if (by_name != 0) + NBACKREF(node).state |= NST_NAME_REF; + +#ifdef USE_BACKREF_AT_LEVEL + if (exist_level != 0) { + NBACKREF(node).state |= NST_NEST_LEVEL; + NBACKREF(node).nest_level = nest_level; + } +#endif + + for (i = 0; i < back_num; i++) { + if (backrefs[i] <= env->num_mem && + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */ + break; + } + } + + if (back_num <= NODE_BACKREFS_SIZE) { + for (i = 0; i < back_num; i++) + NBACKREF(node).back_static[i] = backrefs[i]; + } + else { + int* p = (int* )xmalloc(sizeof(int) * back_num); + if (IS_NULL(p)) { + onig_node_free(node); + return NULL; + } + NBACKREF(node).back_dynamic = p; + for (i = 0; i < back_num; i++) + p[i] = backrefs[i]; + } + return node; +} + +#ifdef USE_SUBEXP_CALL +static Node* +node_new_call(UChar* name, UChar* name_end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + node->type = N_CALL; + NCALL(node).state = 0; + NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF; + NCALL(node).target = NULL_NODE; + NCALL(node).name = name; + NCALL(node).name_end = name_end; + return node; +} +#endif + +static Node* +node_new_quantifier(int lower, int upper, int by_number) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_QUANTIFIER; + NQUANTIFIER(node).state = 0; + NQUANTIFIER(node).target = NULL; + NQUANTIFIER(node).lower = lower; + NQUANTIFIER(node).upper = upper; + NQUANTIFIER(node).greedy = 1; + NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQUANTIFIER(node).head_exact = NULL_NODE; + NQUANTIFIER(node).next_head_exact = NULL_NODE; + NQUANTIFIER(node).is_refered = 0; + if (by_number != 0) + NQUANTIFIER(node).state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQUANTIFIER(node).comb_exp_check_num = 0; +#endif + + return node; +} + +static Node* +node_new_effect(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_EFFECT; + NEFFECT(node).type = type; + NEFFECT(node).state = 0; + NEFFECT(node).regnum = 0; + NEFFECT(node).option = 0; + NEFFECT(node).target = NULL; + NEFFECT(node).call_addr = -1; + NEFFECT(node).opt_count = 0; + return node; +} + +extern Node* +onig_node_new_effect(int type) +{ + return node_new_effect(type); +} + +static Node* +node_new_effect_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_effect(EFFECT_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_EFFECT_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NEFFECT(node).option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) +{ + Node* node = node_new_effect(EFFECT_OPTION); + CHECK_NULL_RETURN(node); + NEFFECT(node).option = option; + return node; +} + +extern int +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) +{ + int addlen = end - s; + + if (addlen > 0) { + int len = NSTRING(node).end - NSTRING(node).s; + + if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + UChar* p; + int capa = len + addlen + NODE_STR_MARGIN; + + if (capa <= NSTRING(node).capa) { + k_strcpy(NSTRING(node).s + len, s, end); + } + else { + if (NSTRING(node).s == NSTRING(node).buf) + p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end, + s, end, capa); + else + p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa); + + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + NSTRING(node).s = p; + NSTRING(node).capa = capa; + } + } + else { + k_strcpy(NSTRING(node).s + len, s, end); + } + NSTRING(node).end = NSTRING(node).s + len + addlen; + } + + return 0; +} + +static int +node_str_cat_char(Node* node, UChar c) +{ + UChar s[1]; + + s[0] = c; + return onig_node_str_cat(node, s, s + 1); +} + +extern void +onig_node_conv_to_str_node(Node* node, int flag) +{ + node->type = N_STRING; + + NSTRING(node).flag = flag; + NSTRING(node).capa = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; +} + +extern void +onig_node_str_clear(Node* node) +{ + if (NSTRING(node).capa != 0 && + IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { + xfree(NSTRING(node).s); + } + + NSTRING(node).capa = 0; + NSTRING(node).flag = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + node->type = N_STRING; + NSTRING(node).capa = 0; + NSTRING(node).flag = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); + return NULL; + } + return node; +} + +extern Node* +onig_node_new_str(const UChar* s, const UChar* end) +{ + return node_new_str(s, end); +} + +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG +static Node* +node_new_str_raw(UChar* s, UChar* end) +{ + Node* node = node_new_str(s, end); + NSTRING_SET_RAW(node); + return node; +} +#endif + +static Node* +node_new_empty(void) +{ + return node_new_str(NULL, NULL); +} + +static Node* +node_new_str_char(UChar c) +{ + UChar p[1]; + + p[0] = c; + return node_new_str(p, p + 1); +} + +static Node* +str_node_split_last_char(StrNode* sn, OnigEncoding enc) +{ + const UChar *p; + Node* n = NULL_NODE; + + if (sn->end > sn->s) { + p = onigenc_get_prev_char_head(enc, sn->s, sn->end); + if (p && p > sn->s) { /* can be splitted. */ + n = node_new_str(p, sn->end); + if ((sn->flag & NSTR_RAW) != 0) + NSTRING_SET_RAW(n); + sn->end = (UChar* )p; + } + } + return n; +} + +static int +str_node_can_be_split(StrNode* sn, OnigEncoding enc) +{ + if (sn->end > sn->s) { + return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0); + } + return 0; +} + +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR +static int +node_str_head_pad(StrNode* sn, int num, UChar val) +{ + UChar buf[NODE_STR_BUF_SIZE]; + int i, len; + + len = sn->end - sn->s; + onig_strcpy(buf, sn->s, sn->end); + onig_strcpy(&(sn->s[num]), buf, buf + len); + sn->end += num; + + for (i = 0; i < num; i++) { + sn->s[i] = val; + } +} +#endif + +extern int +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +{ + unsigned int num, val; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + val = (unsigned int )DIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 10UL < num) + return -1; /* overflow */ + + num = num * 10 + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); + if ((INT_MAX_LIMIT - val) / 16UL < num) + return -1; /* overflow */ + + num = (num << 4) + XDIGITVAL(enc,c); + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + val = ODIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 8UL < num) + return -1; /* overflow */ + + num = (num << 3) + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + + +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) + +/* data format: + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) + */ +static int +new_code_range(BBuf** pbuf) +{ +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) + int r; + OnigCodePoint n; + BBuf* bbuf; + + bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r) return r; + + n = 0; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + return 0; +} + +static int +add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) +{ + int r, inc_n, pos; + int low, high, bound, x; + OnigCodePoint n, *data; + BBuf* bbuf; + + if (from > to) { + n = from; from = to; to = n; + } + + if (IS_NULL(*pbuf)) { + r = new_code_range(pbuf); + if (r) return r; + bbuf = *pbuf; + n = 0; + } + else { + bbuf = *pbuf; + GET_CODE_POINT(n, bbuf->p); + } + data = (OnigCodePoint* )(bbuf->p); + data++; + + for (low = 0, bound = n; low < bound; ) { + x = (low + bound) >> 1; + if (from > data[x*2 + 1]) + low = x + 1; + else + bound = x; + } + + for (high = low, bound = n; high < bound; ) { + x = (high + bound) >> 1; + if (to >= data[x*2] - 1) + high = x + 1; + else + bound = x; + } + + inc_n = low + 1 - high; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; + + if (inc_n != 1) { + if (from > data[low*2]) + from = data[low*2]; + if (to < data[(high - 1)*2 + 1]) + to = data[(high - 1)*2 + 1]; + } + + if (inc_n != 0 && (OnigCodePoint )high < n) { + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; + + if (inc_n > 0) { + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } + else { + BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + } + } + + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + n += inc_n; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + + return 0; +} + +static int +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + if (from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + return 0; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + + return add_code_range_to_buf(pbuf, from, to); +} + +static int +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) +{ + int r, i, n; + OnigCodePoint pre, from, *data, to = 0; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf)) { + set_all: + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); + data++; + if (n <= 0) goto set_all; + + r = 0; + pre = MBCODE_START_POS(enc); + for (i = 0; i < n; i++) { + from = data[i*2]; + to = data[i*2+1]; + if (pre <= from - 1) { + r = add_code_range_to_buf(pbuf, pre, from - 1); + if (r != 0) return r; + } + if (to == ~((OnigCodePoint )0)) break; + pre = to + 1; + } + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + } + return r; +} + +#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ + BBuf *tbuf; \ + int tnot; \ + tnot = not1; not1 = not2; not2 = tnot; \ + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ +} while (0) + +static int +or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, + BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, n1, *data1; + OnigCodePoint from, to; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { + if (not1 != 0 || not2 != 0) + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + return 0; + } + + r = 0; + if (IS_NULL(bbuf2)) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + if (IS_NULL(bbuf1)) { + if (not1 != 0) { + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + else { + if (not2 == 0) { + return bbuf_clone(pbuf, bbuf2); + } + else { + return not_code_range_buf(enc, bbuf2, pbuf); + } + } + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); + data1++; + + if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ + r = bbuf_clone(pbuf, bbuf2); + } + else if (not1 == 0) { /* 1 OR (not 2) */ + r = not_code_range_buf(enc, bbuf2, pbuf); + } + if (r != 0) return r; + + for (i = 0; i < n1; i++) { + from = data1[i*2]; + to = data1[i*2+1]; + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) +{ + int i, r; + OnigCodePoint from2, to2; + + for (i = 0; i < n; i++) { + from2 = data[i*2]; + to2 = data[i*2+1]; + if (from2 < from1) { + if (to2 < from1) continue; + else { + from1 = to2 + 1; + } + } + else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; + } + else { + to1 = from2 - 1; + } + } + else { + from1 = from2; + } + if (from1 > to1) break; + } + if (from1 <= to1) { + r = add_code_range_to_buf(pbuf, from1, to1); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, j, n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1)) { + if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ + return bbuf_clone(pbuf, bbuf2); + return 0; + } + else if (IS_NULL(bbuf2)) { + if (not2 != 0) + return bbuf_clone(pbuf, bbuf1); + return 0; + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); + data1++; + data2++; + + if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + for (j = 0; j < n2; j++) { + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + } + } + else if (not1 == 0) { /* 1 AND (not 2) */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + r = and_code_range1(pbuf, from1, to1, data2, n2); + if (r != 0) return r; + } + } + + return 0; +} + +static int +and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_CCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_CCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_and(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + return 0; +} + +static int +or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_CCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_CCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_or(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + else + return 0; +} + +static int +conv_backslash_value(int c, ScanEnv* env) +{ + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) + return '\v'; + break; + + default: + break; + } + } + return c; +} + +static int +is_invalid_quantifier_target(Node* node) +{ + switch (NTYPE(node)) { + case N_ANCHOR: + return 1; + break; + + case N_EFFECT: + if (NEFFECT(node).type == EFFECT_OPTION) + return is_invalid_quantifier_target(NEFFECT(node).target); + break; + + case N_LIST: /* ex. (?:\G\A)* */ + do { + if (! is_invalid_quantifier_target(NCONS(node).left)) return 0; + } while (IS_NOT_NULL(node = NCONS(node).right)); + return 0; + break; + + case N_ALT: /* ex. (?:abc|\A)* */ + do { + if (is_invalid_quantifier_target(NCONS(node).left)) return 1; + } while (IS_NOT_NULL(node = NCONS(node).right)); + break; + + default: + break; + } + return 0; +} + +/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ +static int +popular_quantifier_num(QuantifierNode* qf) +{ + if (qf->greedy) { + if (qf->lower == 0) { + if (qf->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(qf->upper)) return 1; + } + else if (qf->lower == 1) { + if (IS_REPEAT_INFINITE(qf->upper)) return 2; + } + } + else { + if (qf->lower == 0) { + if (qf->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(qf->upper)) return 4; + } + else if (qf->lower == 1) { + if (IS_REPEAT_INFINITE(qf->upper)) return 5; + } + } + return -1; +} + + +enum ReduceType { + RQ_ASIS = 0, /* as is */ + RQ_DEL = 1, /* delete parent */ + RQ_A, /* to '*' */ + RQ_AQ, /* to '*?' */ + RQ_QQ, /* to '??' */ + RQ_P_QQ, /* to '+)??' */ + RQ_PQ_Q /* to '+?)?' */ +}; + +static enum ReduceType ReduceTypeTable[6][6] = { + {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ + {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ + {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ + {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ +}; + +extern void +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) +{ + int pnum, cnum; + QuantifierNode *p, *c; + + p = &(NQUANTIFIER(pnode)); + c = &(NQUANTIFIER(cnode)); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); + + switch(ReduceTypeTable[cnum][pnum]) { + case RQ_DEL: + *p = *c; + break; + case RQ_A: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; + break; + case RQ_AQ: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; + break; + case RQ_QQ: + p->target = c->target; + p->lower = 0; p->upper = 1; p->greedy = 0; + break; + case RQ_P_QQ: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 0; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; + return ; + break; + case RQ_PQ_Q: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 1; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; + return ; + break; + case RQ_ASIS: + p->target = cnode; + return ; + break; + } + + c->target = NULL_NODE; + onig_node_free(cnode); +} + + +enum TokenSyms { + TK_EOT = 0, /* end of token */ + TK_RAW_BYTE = 1, + TK_CHAR, + TK_STRING, + TK_CODE_POINT, + TK_ANYCHAR, + TK_CHAR_TYPE, + TK_BACKREF, + TK_CALL, + TK_ANCHOR, + TK_OP_REPEAT, + TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ + TK_ALT, + TK_SUBEXP_OPEN, + TK_SUBEXP_CLOSE, + TK_CC_OPEN, + TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + TK_CC_CLOSE, + TK_CC_RANGE, + TK_POSIX_BRACKET_OPEN, + TK_CC_AND, /* && */ + TK_CC_CC_OPEN /* [ */ +}; + +typedef struct { + enum TokenSyms type; + int escaped; + int base; /* is number: 8, 16 (used in [....]) */ + UChar* backp; + union { + UChar* s; + int c; + OnigCodePoint code; + int anchor; + int subtype; + struct { + int lower; + int upper; + int greedy; + int possessive; + } repeat; + struct { + int num; + int ref1; + int* refs; + int by_name; +#ifdef USE_BACKREF_AT_LEVEL + int exist_level; + int level; /* \k<name+n> */ +#endif + } backref; + struct { + UChar* name; + UChar* name_end; + } call; + struct { + int not; + } prop; + } u; +} OnigToken; + + +static int +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +{ + int low, up, syn_allow, non_low = 0; + int r = 0; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); + + if (PEND) { + if (syn_allow) + return 1; /* "....{" : OK! */ + else + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + } + + if (! syn_allow) { + c = PPEEK; + if (c == ')' || c == '(' || c == '|') { + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; + } + } + + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } + + if (PEND) goto invalid; + PFETCH(c); + if (c == ',') { + UChar* prev = p; + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } + else { + if (non_low != 0) + goto invalid; + + PUNFETCH; + up = low; /* {n} : exact n times */ + r = 2; /* fixed */ + } + + if (PEND) goto invalid; + PFETCH(c); + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC(enc)) goto invalid; + PFETCH(c); + } + if (c != '}') goto invalid; + + if (!IS_REPEAT_INFINITE(up) && low > up) { + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + } + + tok->type = TK_INTERVAL; + tok->u.repeat.lower = low; + tok->u.repeat.upper = up; + *src = p; + return r; /* 0: normal {n,m}, 2: fixed {n} */ + + invalid: + if (syn_allow) + return 1; /* OK */ + else + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; +} + +/* \M-, \C-, \c, or \... */ +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) +{ + int v; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + switch (c) { + case 'M': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c = ((c & 0xff) | 0x80); + } + else + goto backslash; + break; + + case 'C': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; + goto control; + } + else + goto backslash; + + case 'c': + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { + control: + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c == '?') { + c = 0177; + } + else { + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c &= 0x9f; + } + break; + } + /* fall through */ + + default: + { + backslash: + c = conv_backslash_value(c, env); + } + break; + } + + *src = p; + return c; +} + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); + +#ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_AT_LEVEL +/* + \k<name+n>, \k<name-n> +*/ +static int +fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end + , ScanEnv* env, int* level) +{ + int r, exist_level = 0; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')' || c == '+' || c == '-') break; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (c != '>') { + if (c == '+' || c == '-') { + int num; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + *level = (num * flag); + exist_level = 1; + + PFETCH(c); + if (c == '>') + goto first_check; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + first_check: + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_AT_LEVEL */ + +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ +static int +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +{ + int r, is_num; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + is_num = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')') break; + + if (is_num == 1) { + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#else +static int +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +{ + int r, len; + OnigCodePoint c = 0; + UChar *name_end; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + r = 0; + while (!PEND) { + name_end = p; + if (enc_len(enc, p) > 1) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + + PFETCH(c); + if (c == '>' || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif + +static void +CC_ESC_WARN(ScanEnv* env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static void +CCEND_ESC_WARN(ScanEnv* env, UChar* c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, + (env)->pattern, (env)->pattern_end, + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static UChar* +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) +{ + int i; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + while (p < to) { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enc_len(enc, q); + } + if (i >= n) { + if (IS_NOT_NULL(next)) + *next = q; + return p; + } + } + p = q; + } + return NULL_UCHARP; +} + +static int +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc) +{ + int i, in_esc; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + in_esc = 0; + while (p < to) { + if (in_esc) { + in_esc = 0; + p += enc_len(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enc_len(enc, q); + } + if (i >= n) return 1; + p += enc_len(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(enc)) in_esc = 1; + p = q; + } + } + } + return 0; +} + +static int +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int num; + OnigCodePoint c, c2; + OnigSyntaxType* syn = env->syntax; + OnigEncoding enc = env->enc; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + PFETCH(c); + tok->type = TK_CHAR; + tok->base = 0; + tok->u.c = c; + tok->escaped = 0; + + if (c == ']') { + tok->type = TK_CC_CLOSE; + } + else if (c == '-') { + tok->type = TK_CC_RANGE; + } + else if (c == MC_ESC(enc)) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) + goto end; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + tok->escaped = 1; + tok->u.c = c; + switch (c) { + case 'w': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WORD; + break; + case 'W': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WORD; + break; + case 'd': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_DIGIT; + break; + case 'D': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_DIGIT; + break; + case 's': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WHITE_SPACE; + break; + case 'S': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + break; + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_XDIGIT; + break; + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_XDIGIT; + break; + + case 'p': + case 'P': + c2 = PPEEK; + if (c2 == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '0': + case '1': case '2': case '3': case '4': case '5': case '6': case '7': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + if (tok->u.c != num) { + tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + } + break; + } + } + else if (c == '[') { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; + tok->backp = p; /* point at '[' is readed */ + PINC; + if (str_exist_check_with_esc(send, 2, p, end, + (OnigCodePoint )']', enc)) { + tok->type = TK_POSIX_BRACKET_OPEN; + } + else { + PUNFETCH; + goto cc_in_cc; + } + } + else { + cc_in_cc: + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { + tok->type = TK_CC_CC_OPEN; + } + else { + CC_ESC_WARN(env, (UChar* )"["); + } + } + } + else if (c == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && (PPEEK_IS('&'))) { + PINC; + tok->type = TK_CC_AND; + } + } + + end: + *src = p; + return tok->type; +} + +static int +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int r, num; + OnigCodePoint c; + OnigEncoding enc = env->enc; + OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + start: + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + tok->type = TK_STRING; + tok->base = 0; + tok->backp = p; + + PFETCH(c); + if (IS_MC_ESC_CODE(c, enc, syn)) { + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + tok->backp = p; + PFETCH(c); + + tok->u.c = c; + tok->escaped = 1; + switch (c) { + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + greedy_check: + if (!PEND && PPEEK_IS('?') && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; + } + else { + possessive_check: + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } + } + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case 'w': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WORD; + break; + + case 'W': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WORD; + break; + + case 'b': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BOUND; + break; + + case 'B': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + break; + +#ifdef USE_WORD_BEGIN_END + case '<': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BEGIN; + break; + + case '>': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_END; + break; +#endif + + case 's': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WHITE_SPACE; + break; + + case 'S': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + break; + + case 'd': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_DIGIT; + break; + + case 'D': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_DIGIT; + break; + + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_XDIGIT; + break; + + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_XDIGIT; + break; + + case 'A': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + begin_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_BUF; + break; + + case 'Z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_SEMI_END_BUF; + break; + + case 'z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + end_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_END_BUF; + break; + + case 'G': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_POSITION; + break; + + case '`': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto begin_buf; + break; + + case '\'': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto end_buf; + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + PUNFETCH; + prev = p; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { + goto skip_backref; + } + + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level = 0; +#endif + break; + } + + skip_backref: + if (c == '8' || c == '9') { + /* normal char */ + p = prev; PINC; + break; + } + + p = prev; + /* fall through */ + case '0': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + else if (c != '0') { + PINC; + } + break; + +#ifdef USE_NAMED_GROUP + case 'k': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { + PFETCH(c); + if (c == '<') { + UChar* name_end; + int* backs; + + prev = p; + +#ifdef USE_BACKREF_AT_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else + r = fetch_name(&p, end, &name_end, env, 1); +#endif + if (r < 0) return r; + + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + else + PUNFETCH; + } + break; +#endif + +#ifdef USE_SUBEXP_CALL + case 'g': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { + PFETCH(c); + if (c == '<') { + UChar* name_end; + + prev = p; + r = fetch_name(&p, end, &name_end, env, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + } + else + PUNFETCH; + } + break; +#endif + + case 'Q': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { + tok->type = TK_QUOTE_OPEN; + } + break; + + case 'p': + case 'P': + if (PPEEK_IS('{') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + /* set_raw: */ + if (tok->u.c != num) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { /* string */ + p = tok->backp + enc_len(enc, tok->backp); + } + break; + } + } + else { + tok->u.c = c; + tok->escaped = 0; + +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR(enc)) + goto any_char; + else if (c == MC_ANYTIME(enc)) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME(enc)) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME(enc)) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME(enc)) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + + switch (c) { + case '.': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS + any_char: +#endif + tok->type = TK_ANYCHAR; + break; + + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + anytime: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + one_or_more_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS + zero_or_one_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + goto greedy_check; + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(enc)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } + PUNFETCH; + } + + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case '^': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + break; + + case '$': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + break; + + case '[': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; + tok->type = TK_CC_OPEN; + break; + + case ']': + if (*src > env->pattern) /* /].../ is allowed. */ + CCEND_ESC_WARN(env, (UChar* )"]"); + break; + + case '#': + if (IS_EXTEND(env->option)) { + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; + } + break; + + case ' ': case '\t': case '\n': case '\r': case '\f': + if (IS_EXTEND(env->option)) + goto start; + break; + + default: + /* string */ + break; + } + } + +#ifdef USE_VARIABLE_META_CHARS + out: +#endif + *src = p; + return tok->type; +} + +static int +add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) +{ + int i, r; + OnigCodePoint j; + + int nsb = ONIGENC_CODE_RANGE_NUM(sbr); + int nmb = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < nsb; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + + for (i = 0; i < nmb; i++) { + r = add_code_range_to_buf(&(cc->mbuf), + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + } + } + else { + OnigCodePoint prev = 0; + + if (ONIGENC_MBC_MINLEN(enc) == 1) { + for (i = 0; i < nsb; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1; + } + if (prev < 0x7f) { + for (j = prev; j < 0x7f; j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + + prev = 0x80; + } + + for (i = 0; i < nmb; i++) { + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return 0; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + const OnigCodePoint *sbr, *mbr; + OnigEncoding enc = env->enc; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); + if (r == 0) { + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr); + } + else if (r != ONIG_NO_SUPPORT_CONFIG) { + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +parse_ctype_to_enc_ctype(int pctype, int* not) +{ + int ctype; + + switch (pctype) { + case CTYPE_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 0; + break; + case CTYPE_NOT_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 1; + break; + case CTYPE_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 0; + break; + case CTYPE_NOT_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 1; + break; + case CTYPE_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 0; + break; + case CTYPE_NOT_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 1; + break; + case CTYPE_XDIGIT: + ctype = ONIGENC_CTYPE_XDIGIT; + *not = 0; + break; + case CTYPE_NOT_XDIGIT: + ctype = ONIGENC_CTYPE_XDIGIT; + *not = 1; + break; + default: + return ONIGERR_PARSER_BUG; + break; + } + return ctype; +} + +typedef struct { + UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + +static int +parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +{ +#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 +#define POSIX_BRACKET_NAME_MAX_LEN 6 + + static PosixBracketEntryType PBS[] = { + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int not, i, r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + if (PPEEK_IS('^')) { + PINC; + not = 1; + } + else + not = 0; + + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2) + goto not_posix_bracket; + + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { + p = (UChar* )onigenc_step(enc, p, end, pb->len); + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; + + PINC; PINC; + *src = p; + return 0; + } + } + + not_posix_bracket: + c = 0; + i = 0; + while (!PEND && ((c = PPEEK) != ':') && c != ']') { + PINC; + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + if (c == ':' && ! PEND) { + PINC; + if (! PEND) { + PFETCH(c); + if (c == ']') + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + } + } + + return 1; /* 1: is not POSIX bracket, but no error. */ +} + +static int +property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) +{ + static PosixBracketEntryType PBS[] = { + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int len; + + len = onigenc_strlen(enc, p, end); + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return -1; +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int ctype; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *prev, *start, *p = *src; + PFETCH_READY; + + /* 'IsXXXX' => 'XXXX' */ + if (!PEND && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) { + c = PPEEK; + if (c == 'I') { + PINC; + if (! PEND) { + c = PPEEK; + if (c == 's') + PINC; + else + PUNFETCH; + } + } + } + + start = prev = p; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + ctype = property_name_to_ctype(start, prev, enc); + if (ctype < 0) break; + + *src = p; + return ctype; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') + break; + } + + onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME, + *src, prev); + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + + return 0; +} + + +enum CCSTATE { + CCS_VALUE, + CCS_RANGE, + CCS_COMPLETE, + CCS_START +}; + +enum CCVALTYPE { + CCV_SB, + CCV_CODE_POINT, + CCV_CLASS +}; + +static int +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + if (*state == CCS_RANGE) + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + + if (*state == CCS_VALUE && *type != CCV_CLASS) { + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + } + + *state = CCS_VALUE; + *type = CCV_CLASS; + return 0; +} + +static int +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + switch (*state) { + case CCS_VALUE: + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + break; + + case CCS_RANGE: + if (intype == *type) { + if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )v); + } + else { + r = add_code_range(&(cc->mbuf), env, *vs, v); + if (r < 0) return r; + } + } + else { +#if 0 + if (intype == CCV_CODE_POINT && *type == CCV_SB) { +#endif + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); + if (r < 0) return r; +#if 0 + } + else + return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; +#endif + } + ccs_range_end: + *state = CCS_COMPLETE; + break; + + case CCS_COMPLETE: + case CCS_START: + *state = CCS_VALUE; + break; + + default: + break; + } + + *vs_israw = v_israw; + *vs = v; + *type = intype; + return 0; +} + +static int +code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, + OnigEncoding enc) +{ + int in_esc; + OnigCodePoint code; + UChar* p = from; + PFETCH_READY; + + in_esc = 0; + while (! PEND) { + if (ignore_escaped && in_esc) { + in_esc = 0; + } + else { + PFETCH(code); + if (code == c) return 1; + if (code == MC_ESC(enc)) in_esc = 1; + } + } + return 0; +} + +static int +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, neg, len, fetched, and_start; + OnigCodePoint v, vs; + UChar *p; + Node* node; + CClassNode *cc, *prev_cc; + CClassNode work_cc; + + enum CCSTATE state; + enum CCVALTYPE val_type, in_type; + int val_israw, in_israw; + + prev_cc = (CClassNode* )NULL; + *np = NULL_NODE; + r = fetch_token_in_cc(tok, src, end, env); + if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { + neg = 1; + r = fetch_token_in_cc(tok, src, end, env); + } + else { + neg = 0; + } + + if (r < 0) return r; + if (r == TK_CC_CLOSE) { + if (! code_exist_check((OnigCodePoint )']', + *src, env->pattern_end, 1, env->enc)) + return ONIGERR_EMPTY_CHAR_CLASS; + + CC_ESC_WARN(env, (UChar* )"]"); + r = tok->type = TK_CHAR; /* allow []...] */ + } + + *np = node = node_new_cclass(); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + cc = &(NCCLASS(node)); + + and_start = 0; + state = CCS_START; + p = *src; + while (r != TK_CC_CLOSE) { + fetched = 0; + switch (r) { + case TK_CHAR: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); + if (len > 1) { + in_type = CCV_CODE_POINT; + } + else { + sb_char: + in_type = CCV_SB; + } + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry2; + break; + + case TK_RAW_BYTE: + /* tok->base != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enc_len(env->enc, buf); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + r = fetch_token_in_cc(tok, &p, end, env); + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } + } + else { + v = (OnigCodePoint )tok->u.c; + raw_single: + in_type = CCV_SB; + } + in_israw = 1; + goto val_entry2; + break; + + case TK_CODE_POINT: + v = tok->u.code; + in_israw = 1; + val_entry: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: + r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + &state, env); + if (r != 0) goto err; + break; + + case TK_POSIX_BRACKET_OPEN: + r = parse_posix_bracket(cc, &p, end, env); + if (r < 0) goto err; + if (r == 1) { /* is not POSIX bracket */ + CC_ESC_WARN(env, (UChar* )"["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; + } + goto next_class; + break; + + case TK_CHAR_TYPE: + { + int ctype, not; + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = add_ctype_to_cc(cc, ctype, not, env); + if (r != 0) return r; + } + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); + if (r != 0) goto err; + break; + + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + + case TK_CC_RANGE: + if (state == CCS_VALUE) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + state = CCS_RANGE; + } + else if (state == CCS_START) { + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, (UChar* )"-"); + + goto val_entry; + } + else if (state == CCS_RANGE) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [!--x] is allowed */ + } + else { /* CCS_COMPLETE */ + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + break; + + case TK_CC_CC_OPEN: /* [ */ + { + Node *anode; + CClassNode* acc; + + r = parse_char_class(&anode, tok, &p, end, env); + if (r != 0) goto cc_open_err; + acc = &(NCCLASS(anode)); + r = or_cclass(cc, acc, env->enc); + + onig_node_free(anode); + cc_open_err: + if (r != 0) goto err; + } + break; + + case TK_CC_AND: /* && */ + { + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + } + else { + prev_cc = cc; + cc = &work_cc; + } + initialize_cclass(cc); + } + break; + + case TK_EOT: + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; + goto err; + break; + default: + r = ONIGERR_PARSER_BUG; + goto err; + break; + } + + if (fetched) + r = tok->type; + else { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + } + } + + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + cc = prev_cc; + } + + if (neg != 0) + CCLASS_SET_NOT(cc); + else + CCLASS_CLEAR_NOT(cc); + if (IS_CCLASS_NOT(cc) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + int is_empty; + + is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + if (is_empty != 0) + BITSET_IS_EMPTY(cc->bs, is_empty); + + if (is_empty == 0) { +#define NEWLINE_CODE 0x0a + + if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT(cc->bs, NEWLINE_CODE); + else + add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + } + } + } + *src = p; + return 0; + + err: + if (cc != &(NCCLASS(*np))) + bbuf_free(cc->mbuf); + onig_node_free(*np); + return r; +} + +static int parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env); + +static int +parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, num; + int list_capture; + Node *target; + OnigOptionType option; + OnigEncoding enc = env->enc; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + *np = NULL; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + + option = env->option; + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH(c); + switch (c) { + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ + break; + + case '=': + *np = onig_node_new_anchor(ANCHOR_PREC_READ); + break; + case '!': /* preceding read */ + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + *np = node_new_effect(EFFECT_STOP_BACKTRACK); + break; + + case '<': /* look behind (?<=...), (?<!...) */ + PFETCH(c); + if (c == '=') + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); + else if (c == '!') + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); +#ifdef USE_NAMED_GROUP + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; + + PUNFETCH; + list_capture = 0; + + named_group: + name = p; + r = fetch_name(&p, end, &name_end, env, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_effect_memory(env->option, 1); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } +#endif + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<') { + list_capture = 1; + goto named_group; /* (?@<name>...) */ + } + PUNFETCH; + } +#endif + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NEFFECT(*np).regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + +#ifdef USE_POSIXLINE_OPTION + case 'p': +#endif + case '-': case 'i': case 'm': case 's': case 'x': + { + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#ifdef USE_POSIXLINE_OPTION + case 'p': + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + break; +#endif + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } + } + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } + else { + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + goto group; + + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + NEFFECT(*np).regnum = num; + } + + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) return r; + + if (NTYPE(*np) == N_ANCHOR) + NANCHOR(*np).target = target; + else { + NEFFECT(*np).target = target; + if (NEFFECT(*np).type == EFFECT_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); + if (r != 0) return r; + } + } + + *src = p; + return 0; +} + +static const char* PopularQStr[] = { + "?", "*", "+", "??", "*?", "+?" +}; + +static const char* ReduceQStr[] = { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" +}; + +static int +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) +{ + QuantifierNode* qn; + + qn = &(NQUANTIFIER(qnode)); + if (qn->lower == 1 && qn->upper == 1) { + return 1; + } + + switch (NTYPE(target)) { + case N_STRING: + if (! group) { + StrNode* sn = &(NSTRING(target)); + if (str_node_can_be_split(sn, env->enc)) { + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } + } + } + break; + + case N_QUANTIFIER: + { /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QuantifierNode* qnt = &(NQUANTIFIER(target)); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); + +#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + UChar buf[WARN_BUFSIZE]; + + switch(ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + + default: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + } + } + + warn_exit: +#endif + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } + } + } + break; + + default: + break; + } + + qn->target = target; + q_exit: + return 0; +} + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static int type_cclass_hash(type_cclass_key* key) +{ + int i, val; + unsigned char *p; + + val = 0; + + p = (unsigned char* )&(key->enc); + for (i = 0; i < sizeof(key->enc); i++) { + val = val * 997 + (int )*p++; + } + + p = (unsigned char* )(&key->type); + for (i = 0; i < sizeof(key->type); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = &(NCCLASS(node)); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + + if (IS_NOT_NULL(key)) xfree(key); + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table(void) +{ + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + onig_st_free_table(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; + } + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + +static int +parse_exp(Node** np, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r, len, group = 0; + Node* qn; + Node** targetp; + + *np = NULL; + if (tok->type == term) + goto end_of_token; + + switch (tok->type) { + case TK_ALT: + case TK_EOT: + end_of_token: + *np = node_new_empty(); + return tok->type; + break; + + case TK_SUBEXP_OPEN: + r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env); + if (r < 0) return r; + if (r == 1) group = 1; + else if (r == 2) { /* option only */ + Node* target; + OnigOptionType prev = env->option; + + env->option = NEFFECT(*np).option; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; + if (r < 0) return r; + NEFFECT(*np).target = target; + return tok->type; + } + break; + + case TK_SUBEXP_CLOSE: + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; + + if (tok->escaped) goto tk_raw_byte; + else goto tk_byte; + break; + + case TK_STRING: + tk_byte: + { + *np = node_new_str(tok->backp, *src); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_STRING) break; + + r = onig_node_str_cat(*np, tok->backp, *src); + if (r < 0) return r; + } + + string_end: + targetp = np; + goto repeat; + } + break; + + case TK_RAW_BYTE: + tk_raw_byte: + { + *np = node_new_str_char((UChar )tok->u.c); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + len = 1; + while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enc_len(env->enc, NSTRING(*np).s)) { + r = fetch_token(tok, src, end, env); + goto string_end; + } + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0); + if (len + rem == enc_len(env->enc, NSTRING(*np).s)) { + goto string_end; + } + } +#endif + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + + len++; + } + } + break; + + case TK_CODE_POINT: + { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + if (num < 0) return num; +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + } + break; + + case TK_QUOTE_OPEN: + { + OnigCodePoint end_op[2]; + UChar *qstart, *qend, *nextp; + + end_op[0] = (OnigCodePoint )MC_ESC(env->enc); + end_op[1] = (OnigCodePoint )'E'; + qstart = *src; + qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); + if (IS_NULL(qend)) { + nextp = qend = end; + } + *np = node_new_str(qstart, qend); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *src = nextp; + } + break; + + case TK_CHAR_TYPE: + { + switch (tok->u.subtype) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *np = node_new_ctype(tok->u.subtype); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + + case CTYPE_WHITE_SPACE: + case CTYPE_NOT_WHITE_SPACE: + case CTYPE_DIGIT: + case CTYPE_NOT_DIGIT: + case CTYPE_XDIGIT: + case CTYPE_NOT_XDIGIT: + { + CClassNode* cc; + int ctype, not; + +#ifdef USE_SHARED_CCLASS_TABLE + const OnigCodePoint *sbr, *mbr; + + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = not; + key.type = ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(not, sbr, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + CCLASS_SET_SHARE(&(NCCLASS(*np))); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + add_ctype_to_cc(cc, ctype, 0, env); + if (not != 0) CCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + } + break; + + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + + case TK_CC_OPEN: + { + CClassNode* cc; + + r = parse_char_class(np, tok, src, end, env); + if (r != 0) return r; + + cc = &(NCCLASS(*np)); + + if (IS_IGNORECASE(env->option)) { + int i, n, in_cc; + const OnigPairAmbigCodes* ccs; + BitSetRef bs = cc->bs; + OnigAmbigType amb; + + for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { + if ((amb & env->ambig_flag) == 0) continue; + + n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs); + for (i = 0; i < n; i++) { + in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc); + + if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) || + (in_cc == 0 && IS_CCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || + ccs[i].from >= SINGLE_BYTE_SIZE) { + /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */ + add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to); + } + else { + if (BITSET_AT(bs, ccs[i].from)) { + /* /(?i:[^A-C])/.match("a") ==> fail. */ + BITSET_SET_BIT(bs, ccs[i].to); + } + if (BITSET_AT(bs, ccs[i].to)) { + BITSET_SET_BIT(bs, ccs[i].from); + } + } + } + } + } + } + } + break; + + case TK_ANYCHAR: + *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUANTIFIER(qn).target = *np; + *np = qn; + break; + + case TK_BACKREF: + len = tok->u.backref.num; + *np = node_new_backref(len, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + +#ifdef USE_SUBEXP_CALL + case TK_CALL: + *np = node_new_call(tok->u.call.name, tok->u.call.name_end); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + env->num_call++; + break; +#endif + + case TK_ANCHOR: + *np = onig_node_new_anchor(tok->u.anchor); + break; + + case TK_OP_REPEAT: + case TK_INTERVAL: + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + else + *np = node_new_empty(); + } + else { + goto tk_byte; + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + { + targetp = np; + + re_entry: + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_quantifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUANTIFIER(qn).greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); + if (r < 0) return r; + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); + NEFFECT(en).target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY); + tmp = NCONS(*targetp).right = node_new_list(qn, NULL); + CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY); + targetp = &(NCONS(tmp).left); + } + goto re_entry; + } + } + + return r; +} + +static int +parse_branch(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (r == TK_EOT || r == term || r == TK_ALT) { + *top = node; + } + else { + *top = node_new_list(node, NULL); + headp = &(NCONS(*top).right); + while (r != TK_EOT && r != term && r != TK_ALT) { + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (NTYPE(node) == N_LIST) { + *headp = node; + while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right; + headp = &(NCONS(node).right); + } + else { + *headp = node_new_list(node, NULL); + headp = &(NCONS(*headp).right); + } + } + } + + return r; +} + +/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ +static int +parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == term) { + *top = node; + } + else if (r == TK_ALT) { + *top = node_new_alt(node, NULL); + headp = &(NCONS(*top).right); + while (r == TK_ALT) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) return r; + + *headp = node_new_alt(node, NULL); + headp = &(NCONS(*headp).right); + } + + if (tok->type != term) + goto err; + } + else { + err: + if (term == TK_SUBEXP_CLOSE) + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + else + return ONIGERR_PARSER_BUG; + } + + return r; +} + +static int +parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigToken tok; + + r = fetch_token(&tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(top, &tok, TK_EOT, src, end, env); + if (r < 0) return r; + return 0; +} + +extern int +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, + ScanEnv* env) +{ + int r; + UChar* p; + +#ifdef USE_NAMED_GROUP + names_clear(reg); +#endif + + scan_env_clear(env); + env->option = reg->options; + env->ambig_flag = reg->ambig_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; + + *root = NULL; + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); + reg->num_mem = env->num_mem; + return r; +} + +extern void +onig_scan_env_set_error_string(ScanEnv* env, int ecode, + UChar* arg, UChar* arg_end) +{ + env->error = arg; + env->error_end = arg_end; +} diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h new file mode 100644 index 0000000..b25618a --- /dev/null +++ b/ext/mbstring/oniguruma/regparse.h @@ -0,0 +1,328 @@ +#ifndef REGPARSE_H +#define REGPARSE_H +/********************************************************************** + regparse.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +/* node type */ +#define N_STRING (1<< 0) +#define N_CCLASS (1<< 1) +#define N_CTYPE (1<< 2) +#define N_ANYCHAR (1<< 3) +#define N_BACKREF (1<< 4) +#define N_QUANTIFIER (1<< 5) +#define N_EFFECT (1<< 6) +#define N_ANCHOR (1<< 7) +#define N_LIST (1<< 8) +#define N_ALT (1<< 9) +#define N_CALL (1<<10) + +#define IS_NODE_TYPE_SIMPLE(type) \ + (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0) + +#define NTYPE(node) ((node)->type) +#define NCONS(node) ((node)->u.cons) +#define NSTRING(node) ((node)->u.str) +#define NCCLASS(node) ((node)->u.cclass) +#define NCTYPE(node) ((node)->u.ctype) +#define NQUANTIFIER(node) ((node)->u.quantifier) +#define NANCHOR(node) ((node)->u.anchor) +#define NBACKREF(node) ((node)->u.backref) +#define NEFFECT(node) ((node)->u.effect) +#define NCALL(node) ((node)->u.call) + +#define CTYPE_WORD (1<<0) +#define CTYPE_NOT_WORD (1<<1) +#define CTYPE_WHITE_SPACE (1<<2) +#define CTYPE_NOT_WHITE_SPACE (1<<3) +#define CTYPE_DIGIT (1<<4) +#define CTYPE_NOT_DIGIT (1<<5) +#define CTYPE_XDIGIT (1<<6) +#define CTYPE_NOT_XDIGIT (1<<7) + +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) + +#define EFFECT_MEMORY (1<<0) +#define EFFECT_OPTION (1<<1) +#define EFFECT_STOP_BACKTRACK (1<<2) + +#define NODE_STR_MARGIN 16 +#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 + +#define NSTR_RAW (1<<0) /* by backslashed number */ +#define NSTR_AMBIG (1<<1) +#define NSTR_AMBIG_REDUCE (1<<2) + +#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW +#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE +#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) +#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) +#define NSTRING_IS_AMBIG_REDUCE(node) \ + (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0) + +#define BACKREFS_P(br) \ + (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); + +#define NQ_TARGET_ISNOT_EMPTY 0 +#define NQ_TARGET_IS_EMPTY 1 +#define NQ_TARGET_IS_EMPTY_MEM 2 +#define NQ_TARGET_IS_EMPTY_REC 3 + + +typedef struct { + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +/* move to regint.h */ +#if 0 +typedef struct { + int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; +#endif + +typedef struct { + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QuantifierNode; + +/* status bits */ +#define NST_MIN_FIXED (1<<0) +#define NST_MAX_FIXED (1<<1) +#define NST_CLEN_FIXED (1<<2) +#define NST_MARK1 (1<<3) +#define NST_MARK2 (1<<4) +#define NST_MEM_BACKREFED (1<<5) +#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) +#define NST_RECURSION (1<<7) +#define NST_CALLED (1<<8) +#define NST_ADDR_FIXED (1<<9) +#define NST_NAMED_GROUP (1<<10) +#define NST_NAME_REF (1<<11) +#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) +#define NST_BY_NUMBER (1<<14) /* {n,m} */ + +#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) +#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) + +#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \ + (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) +#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) + +#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION +#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) +#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) + +typedef struct { + int state; + int type; + int regnum; + OnigOptionType option; + struct _Node* target; + AbsAddrType call_addr; + /* for multiple call reference */ + OnigDistance min_len; /* min length (byte) */ + OnigDistance max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ +} EffectNode; + +#define CALLNODE_REFNUM_UNDEF -1 + +#ifdef USE_SUBEXP_CALL + +typedef struct { + int offset; + struct _Node* target; +} UnsetAddr; + +typedef struct { + int num; + int alloc; + UnsetAddr* us; +} UnsetAddrList; + +typedef struct { + int state; + int ref_num; + UChar* name; + UChar* name_end; + struct _Node* target; /* EffectNode : EFFECT_MEMORY */ + UnsetAddrList* unset_addr_list; +} CallNode; + +#endif + +typedef struct { + int state; + int back_num; + int back_static[NODE_BACKREFS_SIZE]; + int* back_dynamic; + int nest_level; +} BackrefNode; + +typedef struct { + int type; + struct _Node* target; + int char_len; +} AnchorNode; + +typedef struct _Node { + int type; + union { + StrNode str; + CClassNode cclass; + QuantifierNode quantifier; + EffectNode effect; +#ifdef USE_SUBEXP_CALL + CallNode call; +#endif + BackrefNode backref; + AnchorNode anchor; + struct { + struct _Node* left; + struct _Node* right; + } cons; + struct { + int type; + } ctype; + } u; +} Node; + +#define NULL_NODE ((Node* )0) + +#define SCANENV_MEMNODES_SIZE 8 +#define SCANENV_MEM_NODES(senv) \ + (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ + (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) + +typedef struct { + OnigOptionType option; + OnigAmbigType ambig_flag; + OnigEncoding enc; + OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; + BitStatusType backrefed_mem; + UChar* pattern; + UChar* pattern_end; + UChar* error; + UChar* error_end; + regex_t* reg; /* for reg->names only */ + int num_call; +#ifdef USE_SUBEXP_CALL + UnsetAddrList* unset_addr_list; +#endif + int num_mem; +#ifdef USE_NAMED_GROUP + int num_named; +#endif + int mem_alloc; + Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; + Node** mem_nodes_dynamic; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int num_comb_exp_check; + int comb_exp_max_regnum; + int curr_max_regnum; + int has_recursion; +#endif +} ScanEnv; + + +#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) +#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) +#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + + +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} GroupNumRemap; + +extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); +#endif + +extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); +extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); +extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); +extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); +extern void onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); +extern void onig_node_free P_((Node* node)); +extern Node* onig_node_new_effect P_((int type)); +extern Node* onig_node_new_anchor P_((int type)); +extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); +extern Node* onig_node_new_list P_((Node* left, Node* right)); +extern void onig_node_str_clear P_((Node* node)); +extern int onig_free_node_list P_((void)); +extern int onig_names_free P_((regex_t* reg)); +extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); + +#ifdef ONIG_DEBUG +#ifdef USE_NAMED_GROUP +extern int onig_print_names(FILE*, regex_t*); +#endif +#endif + +#endif /* REGPARSE_H */ diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c new file mode 100644 index 0000000..e54b5c4 --- /dev/null +++ b/ext/mbstring/oniguruma/regposerr.c @@ -0,0 +1,90 @@ +/********************************************************************** + regposerr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "onigposix.h" + +#ifdef HAVE_STRING_H +# include <string.h> +#else +# include <strings.h> +#endif + +static char* ESTRING[] = { + NULL, + "failed to match", /* REG_NOMATCH */ + "Invalid regular expression", /* REG_BADPAT */ + "invalid collating element referenced", /* REG_ECOLLATE */ + "invalid character class type referenced", /* REG_ECTYPE */ + "bad backslash-escape sequence", /* REG_EESCAPE */ + "invalid back reference number", /* REG_ESUBREG */ + "imbalanced [ and ]", /* REG_EBRACK */ + "imbalanced ( and )", /* REG_EPAREN */ + "imbalanced { and }", /* REG_EBRACE */ + "invalid repeat range {n,m}", /* REG_BADBR */ + "invalid range", /* REG_ERANGE */ + "Out of memory", /* REG_ESPACE */ + "? * + not preceded by valid regular expression", /* REG_BADRPT */ + + /* Extended errors */ + "internal error", /* REG_EONIG_INTERNAL */ + "invalid wide char value", /* REG_EONIG_BADWC */ + "invalid argument", /* REG_EONIG_BADARG */ + "multi-thread error" /* REG_EONIG_THREAD */ +}; + +#include <stdio.h> + + +extern size_t +regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size) +{ + char* s; + char tbuf[35]; + size_t len; + + if (posix_ecode > 0 && posix_ecode < sizeof(ESTRING) / sizeof(ESTRING[0])) { + s = ESTRING[posix_ecode]; + } + else if (posix_ecode == 0) { + s = ""; + } + else { + sprintf(tbuf, "undefined error code (%d)", posix_ecode); + s = tbuf; + } + + len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */ + + if (buf != NULL && size > 0) { + strncpy(buf, s, size - 1); + buf[size - 1] = '\0'; + } + return len; +} diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c new file mode 100644 index 0000000..a3bacf7 --- /dev/null +++ b/ext/mbstring/oniguruma/regposix.c @@ -0,0 +1,303 @@ +/********************************************************************** + regposix.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define regex_t onig_regex_t +#include "regint.h" +#undef regex_t +#include "onigposix.h" + +#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) +#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) + +/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */ +#define ENC_STRING_LEN(enc,s,len) do { \ + if (ONIGENC_MBC_MINLEN(enc) == 1) { \ + UChar* tmps = (UChar* )(s); \ + while (*tmps != 0) tmps++; \ + len = tmps - (UChar* )(s); \ + } \ + else { \ + len = onigenc_str_bytelen_null(enc, (UChar* )s); \ + } \ +} while(0) + +typedef struct { + int onig_err; + int posix_err; +} O2PERR; + +static int +onig2posix_error_code(int code) +{ + static const O2PERR o2p[] = { + { ONIG_MISMATCH, REG_NOMATCH }, + { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIGERR_MEMORY, REG_ESPACE }, + { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, + { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, + { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, + { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, + { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, + { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, + { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, + { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, + { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, + { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, + { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, + { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, + { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, + { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, + { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, + { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, + { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, + { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, + { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, + { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, + { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + + }; + + int i; + + if (code >= 0) return 0; + + for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) { + if (code == o2p[i].onig_err) + return o2p[i].posix_err; + } + + return REG_EONIG_INTERNAL; /* but, unknown error code */ +} + +extern int +regcomp(regex_t* reg, const char* pattern, int posix_options) +{ + int r, len; + OnigSyntaxType* syntax = OnigDefaultSyntax; + OnigOptionType options; + + if ((posix_options & REG_EXTENDED) == 0) + syntax = ONIG_SYNTAX_POSIX_BASIC; + + options = syntax->options; + if ((posix_options & REG_ICASE) != 0) + ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); + if ((posix_options & REG_NEWLINE) != 0) { + ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); + ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); + } + + reg->comp_options = posix_options; + + ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); + r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), + options, OnigEncDefaultCharEncoding, syntax, + (OnigErrorInfo* )NULL); + if (r != ONIG_NORMAL) { + return onig2posix_error_code(r); + } + + reg->re_nsub = ONIG_C(reg)->num_mem; + return 0; +} + +extern int +regexec(regex_t* reg, const char* str, size_t nmatch, + regmatch_t pmatch[], int posix_options) +{ + int r, i, len; + UChar* end; + regmatch_t* pm; + OnigOptionType options; + + options = ONIG_OPTION_POSIX_REGION; + if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; + if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; + + if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { + pm = (regmatch_t* )NULL; + nmatch = 0; + } + else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { + pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) + * (ONIG_C(reg)->num_mem + 1)); + if (pm == NULL) + return REG_ESPACE; + } + else { + pm = pmatch; + } + + ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); + end = (UChar* )(str + len); + r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, + (OnigRegion* )pm, options); + + if (r >= 0) { + r = 0; /* Match */ + if (pm != pmatch && pm != NULL) { + xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); + } + } + else if (r == ONIG_MISMATCH) { + r = REG_NOMATCH; + for (i = 0; i < (int )nmatch; i++) + pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; + } + else { + r = onig2posix_error_code(r); + } + + if (pm != pmatch && pm != NULL) + xfree(pm); + +#if 0 + if (reg->re_nsub > nmatch - 1) + reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); +#endif + + return r; +} + +extern void +regfree(regex_t* reg) +{ + onig_free(ONIG_C(reg)); +} + + +extern void +reg_set_encoding(int mb_code) +{ + OnigEncoding enc; + + switch (mb_code) { + case REG_POSIX_ENCODING_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case REG_POSIX_ENCODING_EUC_JP: + enc = ONIG_ENCODING_EUC_JP; + break; + case REG_POSIX_ENCODING_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case REG_POSIX_ENCODING_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + case REG_POSIX_ENCODING_UTF16_BE: + enc = ONIG_ENCODING_UTF16_BE; + break; + case REG_POSIX_ENCODING_UTF16_LE: + enc = ONIG_ENCODING_UTF16_LE; + break; + + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); +} + +extern int +reg_name_to_group_numbers(regex_t* reg, + const unsigned char* name, const unsigned char* name_end, int** nums) +{ + return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); +} + +typedef struct { + int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; +} i_wrap; + +static int i_wrapper(const unsigned char* name, const unsigned char* name_end, + int ng, int* gs, + onig_regex_t* reg, void* arg) +{ + i_wrap* warg = (i_wrap* )arg; + + return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); +} + +extern int +reg_foreach_name(regex_t* reg, + int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), + void* arg) +{ + i_wrap warg; + + warg.func = func; + warg.reg = reg; + warg.arg = arg; + + return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); +} + +extern int +reg_number_of_names(regex_t* reg) +{ + return onig_number_of_names(ONIG_C(reg)); +} diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c new file mode 100644 index 0000000..9114e39 --- /dev/null +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -0,0 +1,236 @@ +/********************************************************************** + regsyntax.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigSyntaxType OnigSyntaxASIS = { + 0 + , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE + , 0 + , ONIG_OPTION_NONE +}; + +OnigSyntaxType OnigSyntaxPosixBasic = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL ) + , 0 + , 0 + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) +}; + +OnigSyntaxType OnigSyntaxPosixExtended = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | + ONIG_SYN_OP_BRACE_INTERVAL | + ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) + , 0 + , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) +}; + +OnigSyntaxType OnigSyntaxEmacs = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | + ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | + ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | + ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) + , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC + , ONIG_OPTION_NONE +}; + +OnigSyntaxType OnigSyntaxGrep = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | + ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) + , 0 + , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) + , ONIG_OPTION_NONE +}; + +OnigSyntaxType OnigSyntaxGnuRegex = { + SYN_GNU_REGEX_OP + , 0 + , SYN_GNU_REGEX_BV + , ONIG_OPTION_NONE +}; + +OnigSyntaxType OnigSyntaxJava = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | + ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) + , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , ONIG_OPTION_SINGLELINE +}; + +OnigSyntaxType OnigSyntaxPerl = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS ) + , SYN_GNU_REGEX_BV + , ONIG_OPTION_SINGLELINE +}; + +/* Perl + named group */ +OnigSyntaxType OnigSyntaxPerl_NG = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , ONIG_OPTION_SINGLELINE +}; + + + +extern int +onig_set_default_syntax(OnigSyntaxType* syntax) +{ + if (IS_NULL(syntax)) + syntax = ONIG_SYNTAX_RUBY; + + OnigDefaultSyntax = syntax; + return 0; +} + +extern void +onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) +{ + *to = *from; +} + +extern void +onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +{ + syntax->op = op; +} + +extern void +onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +{ + syntax->op2 = op2; +} + +extern void +onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +{ + syntax->behavior = behavior; +} + +extern void +onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) +{ + syntax->options = options; +} + +extern unsigned int +onig_get_syntax_op(OnigSyntaxType* syntax) +{ + return syntax->op; +} + +extern unsigned int +onig_get_syntax_op2(OnigSyntaxType* syntax) +{ + return syntax->op2; +} + +extern unsigned int +onig_get_syntax_behavior(OnigSyntaxType* syntax) +{ + return syntax->behavior; +} + +extern OnigOptionType +onig_get_syntax_options(OnigSyntaxType* syntax) +{ + return syntax->options; +} + +#ifdef USE_VARIABLE_META_CHARS +extern int onig_set_meta_char(OnigEncoding enc, + unsigned int what, OnigCodePoint code) +{ + switch (what) { + case ONIG_META_CHAR_ESCAPE: + enc->meta_char_table.esc = code; + break; + case ONIG_META_CHAR_ANYCHAR: + enc->meta_char_table.anychar = code; + break; + case ONIG_META_CHAR_ANYTIME: + enc->meta_char_table.anytime = code; + break; + case ONIG_META_CHAR_ZERO_OR_ONE_TIME: + enc->meta_char_table.zero_or_one_time = code; + break; + case ONIG_META_CHAR_ONE_OR_MORE_TIME: + enc->meta_char_table.one_or_more_time = code; + break; + case ONIG_META_CHAR_ANYCHAR_ANYTIME: + enc->meta_char_table.anychar_anytime = code; + break; + default: + return ONIGERR_INVALID_ARGUMENT; + break; + } + return 0; +} +#endif /* USE_VARIABLE_META_CHARS */ diff --git a/ext/mbstring/oniguruma/regtrav.c b/ext/mbstring/oniguruma/regtrav.c new file mode 100644 index 0000000..58a17f5 --- /dev/null +++ b/ext/mbstring/oniguruma/regtrav.c @@ -0,0 +1,76 @@ +/********************************************************************** + regtrav.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifdef USE_CAPTURE_HISTORY + +static int +capture_tree_traverse(OnigCaptureTreeNode* node, int at, + int(*callback_func)(int,int,int,int,int,void*), + int level, void* arg) +{ + int r, i; + + if (node == (OnigCaptureTreeNode* )0) + return 0; + + if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { + r = (*callback_func)(node->group, node->beg, node->end, + level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); + if (r != 0) return r; + } + + for (i = 0; i < node->num_childs; i++) { + r = capture_tree_traverse(node->childs[i], at, + callback_func, level + 1, arg); + if (r != 0) return r; + } + + if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { + r = (*callback_func)(node->group, node->beg, node->end, + level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); + if (r != 0) return r; + } + + return 0; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern int +onig_capture_tree_traverse(OnigRegion* region, int at, + int(*callback_func)(int,int,int,int,int,void*), void* arg) +{ +#ifdef USE_CAPTURE_HISTORY + return capture_tree_traverse(region->history_root, at, + callback_func, 0, arg); +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c new file mode 100644 index 0000000..5fad0cc --- /dev/null +++ b/ext/mbstring/oniguruma/regversion.c @@ -0,0 +1,55 @@ +/********************************************************************** + regversion.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "oniguruma.h" +#include <stdio.h> + +extern const char* +onig_version(void) +{ + static char s[12]; + + sprintf(s, "%d.%d.%d", + ONIGURUMA_VERSION_MAJOR, + ONIGURUMA_VERSION_MINOR, + ONIGURUMA_VERSION_TEENY); + return s; +} + +extern const char* +onig_copyright(void) +{ + static char s[58]; + + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", + ONIGURUMA_VERSION_MAJOR, + ONIGURUMA_VERSION_MINOR, + ONIGURUMA_VERSION_TEENY); + return s; +} diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c new file mode 100644 index 0000000..2324da2 --- /dev/null +++ b/ext/mbstring/oniguruma/st.c @@ -0,0 +1,589 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#include <malloc.h> +#endif + +#ifdef NOT_RUBY +#include "regint.h" +#else +#ifdef RUBY_PLATFORM +#define xmalloc ruby_xmalloc +#define xcalloc ruby_xcalloc +#define xrealloc ruby_xrealloc +#define xfree ruby_xfree + +void *xmalloc(long); +void *xcalloc(long, long); +void *xrealloc(void *, long); +void xfree(void *); +#endif +#endif + +#include "st.h" + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + unsigned int hash; + st_data_t key; + st_data_t record; + st_table_entry *next; +}; + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + + /* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ + +static int numcmp(long, long); +static int numhash(long); +static struct st_hash_type type_numhash = { + numcmp, + numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static int strhash(const char *); +static struct st_hash_type type_strhash = { + strcmp, + strhash, +}; + +static void rehash(st_table *); + +#define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) +#define Calloc(n,s) (char*)xcalloc((n),(s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) + +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* +Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static const long primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static int +new_size(size) + int size; +{ + int i; + +#if 0 + for (i=3; i<31; i++) { + if ((1<<i) > size) return 1<<i; + } + return -1; +#else + int newsize; + + for (i = 0, newsize = MINSIZE; + i < (int )(sizeof(primes)/sizeof(primes[0])); + i++, newsize <<= 1) + { + if (newsize > size) return primes[i]; + } + /* Ran out of polynomials */ + return -1; /* should raise exception */ +#endif +} + +#ifdef HASH_LOG +static int collision = 0; +static int init_st = 0; + +static void +stat_col() +{ + FILE *f = fopen("/tmp/col", "w"); + fprintf(f, "collision: %d\n", collision); + fclose(f); +} +#endif + +st_table* +st_init_table_with_size(type, size) + struct st_hash_type *type; + int size; +{ + st_table *tbl; + +#ifdef HASH_LOG + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + size = new_size(size); /* round up to prime number */ + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + + return tbl; +} + +st_table* +st_init_table(type) + struct st_hash_type *type; +{ + return st_init_table_with_size(type, 0); +} + +st_table* +st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table* +st_init_numtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table* +st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table* +st_init_strtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_strhash, size); +} + +void +st_free_table(table) + st_table *table; +{ + register st_table_entry *ptr, *next; + int i; + + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + free(table->bins); + free(table); +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#ifdef HASH_LOG +#define COLLISION collision++ +#else +#define COLLISION +#endif + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +int +st_lookup(table, key, value) + st_table *table; + register st_data_t key; + st_data_t *value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } +} + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0) + +int +st_insert(table, key, value) + register st_table *table; + register st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +void +st_add_direct(table, key, value) + st_table *table; + st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void +rehash(table) + register st_table *table; +{ + register st_table_entry *ptr, *next, **new_bins; + int i, old_num_bins = table->num_bins, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(old_num_bins+1); + new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); + + for(i = 0; i < old_num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + ptr = next; + } + } + free(table->bins); + table->num_bins = new_num_bins; + table->bins = new_bins; +} + +st_table* +st_copy(old_table) + st_table *old_table; +{ + st_table *new_table; + st_table_entry *ptr, *entry; + int i, num_bins = old_table->num_bins; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + for(i = 0; i < num_bins; i++) { + new_table->bins[i] = 0; + ptr = old_table->bins[i]; + while (ptr != 0) { + entry = alloc(st_table_entry); + if (entry == 0) { + free(new_table->bins); + free(new_table); + return 0; + } + *entry = *ptr; + entry->next = new_table->bins[i]; + new_table->bins[i] = entry; + ptr = ptr->next; + } + } + return new_table; +} + +int +st_delete(table, key, value) + register st_table *table; + register st_data_t *key; + st_data_t *value; +{ + unsigned int hash_val; + st_table_entry *tmp; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + if (EQUAL(table, *key, ptr->key)) { + table->bins[hash_val] = ptr->next; + table->num_entries--; + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + + for(; ptr->next != 0; ptr = ptr->next) { + if (EQUAL(table, ptr->next->key, *key)) { + tmp = ptr->next; + ptr->next = ptr->next->next; + table->num_entries--; + if (value != 0) *value = tmp->record; + *key = tmp->key; + free(tmp); + return 1; + } + } + + return 0; +} + +int +st_delete_safe(table, key, value, never) + register st_table *table; + register st_data_t *key; + st_data_t *value; + st_data_t never; +{ + unsigned int hash_val; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + for(; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + table->num_entries--; + *key = ptr->key; + if (value != 0) *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + return 0; +} + +static int +delete_never(key, value, never) + st_data_t key, value, never; +{ + if (value == never) return ST_DELETE; + return ST_CONTINUE; +} + +void +st_cleanup_safe(table, never) + st_table *table; + st_data_t never; +{ + int num_entries = table->num_entries; + + st_foreach(table, delete_never, never); + table->num_entries = num_entries; +} + +int +st_foreach(table, func, arg) + st_table *table; + int (*func)(); + st_data_t arg; +{ + st_table_entry *ptr, *last, *tmp; + enum st_retval retval; + int i; + + for(i = 0; i < table->num_bins; i++) { + last = 0; + for(ptr = table->bins[i]; ptr != 0;) { + retval = (*func)(ptr->key, ptr->record, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + tmp = 0; + if (i < table->num_bins) { + for (tmp = table->bins[i]; tmp; tmp=tmp->next) { + if (tmp == ptr) break; + } + } + if (!tmp) { + /* call func with error notice */ + return 1; + } + /* fall through */ + case ST_CONTINUE: + last = ptr; + ptr = ptr->next; + break; + case ST_STOP: + return 0; + case ST_DELETE: + tmp = ptr; + if (last == 0) { + table->bins[i] = ptr->next; + } + else { + last->next = ptr->next; + } + ptr = ptr->next; + free(tmp); + table->num_entries--; + } + } + } + return 0; +} + +static int +strhash(string) + register const char *string; +{ + register int c; + +#ifdef HASH_ELFHASH + register unsigned int h = 0, g; + + while ((c = *string++) != '\0') { + h = ( h << 4 ) + c; + if ( g = h & 0xF0000000 ) + h ^= g >> 24; + h &= ~g; + } + return h; +#elif HASH_PERL + register int val = 0; + + while ((c = *string++) != '\0') { + val += c; + val += (val << 10); + val ^= (val >> 6); + } + val += (val << 3); + val ^= (val >> 11); + + return val + (val << 15); +#else + register int val = 0; + + while ((c = *string++) != '\0') { + val = val*997 + c; + } + + return val + (val>>5); +#endif +} + +static int +numcmp(x, y) + long x, y; +{ + return x != y; +} + +static int +numhash(n) + long n; +{ + return n; +} diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h new file mode 100644 index 0000000..da65e7f --- /dev/null +++ b/ext/mbstring/oniguruma/st.h @@ -0,0 +1,63 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* @(#) st.h 5.1 89/12/14 */ + +#ifndef ST_INCLUDED + +#define ST_INCLUDED + +typedef unsigned long st_data_t; +#define ST_DATA_T_DEFINED + +typedef struct st_table st_table; + +struct st_hash_type { + int (*compare)(); + int (*hash)(); +}; + +struct st_table { + struct st_hash_type *type; + int num_bins; + int num_entries; + struct st_table_entry **bins; +}; + +#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) + +enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + +#ifndef _ +# define _(args) args +#endif +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif + +st_table *st_init_table _((struct st_hash_type *)); +st_table *st_init_table_with_size _((struct st_hash_type *, int)); +st_table *st_init_numtable _((void)); +st_table *st_init_numtable_with_size _((int)); +st_table *st_init_strtable _((void)); +st_table *st_init_strtable_with_size _((int)); +int st_delete _((st_table *, st_data_t *, st_data_t *)); +int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); +int st_insert _((st_table *, st_data_t, st_data_t)); +int st_lookup _((st_table *, st_data_t, st_data_t *)); +int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +void st_add_direct _((st_table *, st_data_t, st_data_t)); +void st_free_table _((st_table *)); +void st_cleanup_safe _((st_table *, st_data_t)); +st_table *st_copy _((st_table *)); + +#define ST_NUMCMP ((int (*)()) 0) +#define ST_NUMHASH ((int (*)()) -2) + +#define st_numcmp ST_NUMCMP +#define st_numhash ST_NUMHASH + +#endif /* ST_INCLUDED */ diff --git a/ext/mbstring/oniguruma/win32/config.h b/ext/mbstring/oniguruma/win32/config.h new file mode 100644 index 0000000..bdbdaf2 --- /dev/null +++ b/ext/mbstring/oniguruma/win32/config.h @@ -0,0 +1,84 @@ +#define STDC_HEADERS 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_FLOAT_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 0 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 4 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define HAVE_PROTOTYPES 1 +#define TOKEN_PASTE(x,y) x##y +#define HAVE_STDARG_PROTOTYPES 1 +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define STDC_HEADERS 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_LIMITS_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define HAVE_STRUCT_STAT_ST_RDEV 1 +#define HAVE_ST_RDEV 1 +#define GETGROUPS_T int +#define RETSIGTYPE void +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MEMCMP 1 +#define HAVE_MEMMOVE 1 +#define HAVE_MKDIR 1 +#define HAVE_STRCASECMP 1 +#define HAVE_STRNCASECMP 1 +#define HAVE_STRERROR 1 +#define HAVE_STRFTIME 1 +#define HAVE_STRCHR 1 +#define HAVE_STRSTR 1 +#define HAVE_STRTOD 1 +#define HAVE_STRTOL 1 +#define HAVE_STRTOUL 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_FMOD 1 +#define HAVE_FREXP 1 +#define HAVE_HYPOT 1 +#define HAVE_MODF 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE__SETJMP 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_MKTIME 1 +#define HAVE_COSH 1 +#define HAVE_SINH 1 +#define HAVE_TANH 1 +#define HAVE_EXECVE 1 +#define HAVE_TZNAME 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" |