diff options
author | SVN Migration <svn@php.net> | 2006-10-15 21:09:28 +0000 |
---|---|---|
committer | SVN Migration <svn@php.net> | 2006-10-15 21:09:28 +0000 |
commit | 88ec761548b66f58acc1a86cdd0fc164ca925476 (patch) | |
tree | d0af978fa00d83bb1d82c613f66477fbd6bb18aa /ext/mbstring/oniguruma/enc | |
parent | 268984b4787e797db6054313fc9ba3b9e845306e (diff) | |
download | php-git-PECL_OPENSSL.tar.gz |
This commit was manufactured by cvs2svn to create branch 'PECL_OPENSSL'.PECL_OPENSSL
Diffstat (limited to 'ext/mbstring/oniguruma/enc')
31 files changed, 0 insertions, 15162 deletions
diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c deleted file mode 100644 index 64be21d7ff..0000000000 --- a/ext/mbstring/oniguruma/enc/ascii.c +++ /dev/null @@ -1,67 +0,0 @@ -/********************************************************************** - ascii.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static int -ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingASCII = { - onigenc_single_byte_mbc_enc_len, - "US-ASCII", /* name */ - 1, /* max byte length */ - 1, /* min byte length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - ascii_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c deleted file mode 100644 index 86792666a4..0000000000 --- a/ext/mbstring/oniguruma/enc/big5.c +++ /dev/null @@ -1,168 +0,0 @@ -/********************************************************************** - big5.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static const int EncLen_BIG5[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static int -big5_mbc_enc_len(const UChar* p) -{ - return EncLen_BIG5[*p]; -} - -static OnigCodePoint -big5_mbc_to_code(const UChar* p, const UChar* end) -{ - return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); -} - -static int -big5_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); -} - -static int -big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag, - pp, end, lower); -} - -static int -big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); -} - -static int -big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); -} - -static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 -}; - -#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) -#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] - -static UChar* -big5_left_adjust_char_head(const UChar* start, const UChar* s) -{ - const UChar *p; - int len; - - if (s <= start) return (UChar* )s; - p = s; - - if (BIG5_ISMB_TRAIL(*p)) { - while (p > start) { - if (! BIG5_ISMB_FIRST(*--p)) { - p++; - break; - } - } - } - len = enc_len(ONIG_ENCODING_BIG5, p); - if (p + len > s) return (UChar* )p; - p += len; - return (UChar* )(p + ((s - p) & ~1)); -} - -static int -big5_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - const UChar c = *s; - - return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); -} - -OnigEncodingType OnigEncodingBIG5 = { - big5_mbc_enc_len, - "Big5", /* name */ - 2, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - big5_mbc_to_code, - onigenc_mb2_code_to_mbclen, - big5_code_to_mbc, - big5_mbc_to_normalize, - big5_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - big5_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - big5_left_adjust_char_head, - big5_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c deleted file mode 100644 index 71c81ee9fe..0000000000 --- a/ext/mbstring/oniguruma/enc/euc_jp.c +++ /dev/null @@ -1,228 +0,0 @@ -/********************************************************************** - euc_jp.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) - -static const int EncLen_EUCJP[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static int -eucjp_mbc_enc_len(const UChar* p) -{ - return EncLen_EUCJP[*p]; -} - -static OnigCodePoint -eucjp_mbc_to_code(const UChar* p, const UChar* end) -{ - int c, i, len; - OnigCodePoint n; - - len = enc_len(ONIG_ENCODING_EUC_JP, p); - n = (OnigCodePoint )*p++; - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - return n; -} - -static int -eucjp_code_to_mbclen(OnigCodePoint code) -{ - if (ONIGENC_IS_CODE_ASCII(code)) return 1; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else return 0; -} - -#if 0 -static int -eucjp_code_to_mbc_first(OnigCodePoint code) -{ - int first; - - if ((code & 0xff0000) != 0) { - first = (code >> 16) & 0xff; - } - else if ((code & 0xff00) != 0) { - first = (code >> 8) & 0xff; - } - else { - return (int )code; - } - return first; -} -#endif - -static int -eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); - if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); - *p++ = (UChar )(code & 0xff); - -#if 1 - if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) - return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; -#endif - return p - buf; -} - -static int -eucjp_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - int len; - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp)++; - return 1; - } - else { - len = enc_len(ONIG_ENCODING_EUC_JP, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end); -} - -static int -eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { - return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -eucjp_left_adjust_char_head(const UChar* start, const UChar* s) -{ - /* In this encoding - mb-trail bytes doesn't mix with single bytes. - */ - const UChar *p; - int len; - - if (s <= start) return (UChar* )s; - p = s; - - while (!eucjp_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_JP, p); - if (p + len > s) return (UChar* )p; - p += len; - return (UChar* )(p + ((s - p) & ~1)); -} - -static int -eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - const UChar c = *s; - if (c <= 0x7e || c == 0x8e || c == 0x8f) - return TRUE; - else - return FALSE; -} - -OnigEncodingType OnigEncodingEUC_JP = { - eucjp_mbc_enc_len, - "EUC-JP", /* name */ - 3, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - eucjp_mbc_to_code, - eucjp_code_to_mbclen, - eucjp_code_to_mbc, - eucjp_mbc_to_normalize, - eucjp_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - eucjp_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - eucjp_left_adjust_char_head, - eucjp_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c deleted file mode 100644 index 57bf801536..0000000000 --- a/ext/mbstring/oniguruma/enc/euc_kr.c +++ /dev/null @@ -1,173 +0,0 @@ -/********************************************************************** - euc_kr.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static const int EncLen_EUCKR[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static int -euckr_mbc_enc_len(const UChar* p) -{ - return EncLen_EUCKR[*p]; -} - -static OnigCodePoint -euckr_mbc_to_code(const UChar* p, const UChar* end) -{ - return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); -} - -static int -euckr_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); -} - -static int -euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag, - pp, end, lower); -} - -static int -euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); -} - -static int -euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); -} - -#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) - -static UChar* -euckr_left_adjust_char_head(const UChar* start, const UChar* s) -{ - /* Assumed in this encoding, - mb-trail bytes don't mix with single bytes. - */ - const UChar *p; - int len; - - if (s <= start) return (UChar* )s; - p = s; - - while (!euckr_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_KR, p); - if (p + len > s) return (UChar* )p; - p += len; - return (UChar* )(p + ((s - p) & ~1)); -} - -static int -euckr_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - const UChar c = *s; - if (c <= 0x7e) return TRUE; - else return FALSE; -} - -OnigEncodingType OnigEncodingEUC_KR = { - euckr_mbc_enc_len, - "EUC-KR", /* name */ - 2, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - euckr_mbc_to_code, - onigenc_mb2_code_to_mbclen, - euckr_code_to_mbc, - euckr_mbc_to_normalize, - euckr_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - euckr_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - euckr_left_adjust_char_head, - euckr_is_allowed_reverse_match -}; - -/* Same with OnigEncodingEUC_KR except the name */ -OnigEncodingType OnigEncodingEUC_CN = { - euckr_mbc_enc_len, - "EUC-CN", /* name */ - 2, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - euckr_mbc_to_code, - onigenc_mb2_code_to_mbclen, - euckr_code_to_mbc, - euckr_mbc_to_normalize, - euckr_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - euckr_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - euckr_left_adjust_char_head, - euckr_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c deleted file mode 100644 index 6f396e75e6..0000000000 --- a/ext/mbstring/oniguruma/enc/euc_tw.c +++ /dev/null @@ -1,144 +0,0 @@ -/********************************************************************** - euc_tw.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static const int EncLen_EUCTW[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static int -euctw_mbc_enc_len(const UChar* p) -{ - return EncLen_EUCTW[*p]; -} - -static OnigCodePoint -euctw_mbc_to_code(const UChar* p, const UChar* end) -{ - return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); -} - -static int -euctw_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); -} - -static int -euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag, - pp, end, lower); -} - -static int -euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end); -} - -static int -euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); -} - -#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff) - -static UChar* -euctw_left_adjust_char_head(const UChar* start, const UChar* s) -{ - /* Assumed in this encoding, - mb-trail bytes don't mix with single bytes. - */ - const UChar *p; - int len; - - if (s <= start) return (UChar* )s; - p = s; - - while (!euctw_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_TW, p); - if (p + len > s) return (UChar* )p; - p += len; - return (UChar* )(p + ((s - p) & ~1)); -} - -static int -euctw_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - const UChar c = *s; - if (c <= 0x7e) return TRUE; - else return FALSE; -} - -OnigEncodingType OnigEncodingEUC_TW = { - euctw_mbc_enc_len, - "EUC-TW", /* name */ - 4, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - euctw_mbc_to_code, - onigenc_mb4_code_to_mbclen, - euctw_code_to_mbc, - euctw_mbc_to_normalize, - euctw_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - euctw_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - euctw_left_adjust_char_head, - euctw_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c deleted file mode 100644 index 01995ea094..0000000000 --- a/ext/mbstring/oniguruma/enc/gb18030.c +++ /dev/null @@ -1,501 +0,0 @@ -/********************************************************************** - gb18030.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2005 KUBO Takehiro <kubo AT jiubao DOT org> - * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#if 1 -#define DEBUG_GB18030(arg) -#else -#define DEBUG_GB18030(arg) printf arg -#endif - -enum { - C1, /* one-byte char */ - C2, /* one-byte or second of two-byte char */ - C4, /* one-byte or second or fourth of four-byte char */ - CM /* first of two- or four-byte char or second of two-byte char */ -}; - -static const char GB18030_MAP[] = { - C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, - C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, - C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, - C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, - C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, - C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, - C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, - C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, - C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, - CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 -}; - -static int -gb18030_mbc_enc_len(const UChar* p) -{ - if (GB18030_MAP[*p] != CM) - return 1; - p++; - if (GB18030_MAP[*p] == C4) - return 4; - if (GB18030_MAP[*p] == C1) - return 1; /* illegal sequence */ - return 2; -} - -static OnigCodePoint -gb18030_mbc_to_code(const UChar* p, const UChar* end) -{ - return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end); -} - -static int -gb18030_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf); -} - -static int -gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag, - pp, end, lower); -} - -static int -gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); -} - -static int -gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype); -} - -enum state { - S_START, - S_one_C2, - S_one_C4, - S_one_CM, - - S_odd_CM_one_CX, - S_even_CM_one_CX, - - /* CMC4 : pair of "CM C4" */ - S_one_CMC4, - S_odd_CMC4, - S_one_C4_odd_CMC4, - S_even_CMC4, - S_one_C4_even_CMC4, - - S_odd_CM_odd_CMC4, - S_even_CM_odd_CMC4, - - S_odd_CM_even_CMC4, - S_even_CM_even_CMC4, - - /* C4CM : pair of "C4 CM" */ - S_odd_C4CM, - S_one_CM_odd_C4CM, - S_even_C4CM, - S_one_CM_even_C4CM, - - S_even_CM_odd_C4CM, - S_odd_CM_odd_C4CM, - S_even_CM_even_C4CM, - S_odd_CM_even_C4CM, -}; - -static UChar* -gb18030_left_adjust_char_head(const UChar* start, const UChar* s) -{ - const UChar *p; - enum state state = S_START; - - DEBUG_GB18030(("----------------\n")); - for (p = s; p >= start; p--) { - DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); - switch (state) { - case S_START: - switch (GB18030_MAP[*p]) { - case C1: - return (UChar *)s; - case C2: - state = S_one_C2; /* C2 */ - break; - case C4: - state = S_one_C4; /* C4 */ - break; - case CM: - state = S_one_CM; /* CM */ - break; - } - break; - case S_one_C2: /* C2 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)s; - case CM: - state = S_odd_CM_one_CX; /* CM C2 */ - break; - } - break; - case S_one_C4: /* C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)s; - case CM: - state = S_one_CMC4; - break; - } - break; - case S_one_CM: /* CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)s; - case C4: - state = S_odd_C4CM; - break; - case CM: - state = S_odd_CM_one_CX; /* CM CM */ - break; - } - break; - - case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 1); - case CM: - state = S_even_CM_one_CX; - break; - } - break; - case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)s; - case CM: - state = S_odd_CM_one_CX; - break; - } - break; - - case S_one_CMC4: /* CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)(s - 1); - case C4: - state = S_one_C4_odd_CMC4; /* C4 CM C4 */ - break; - case CM: - state = S_even_CM_one_CX; /* CM CM C4 */ - break; - } - break; - case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)(s - 1); - case C4: - state = S_one_C4_odd_CMC4; - break; - case CM: - state = S_odd_CM_odd_CMC4; - break; - } - break; - case S_one_C4_odd_CMC4: /* C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 1); - case CM: - state = S_even_CMC4; /* CM C4 CM C4 */ - break; - } - break; - case S_even_CMC4: /* CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)(s - 3); - case C4: - state = S_one_C4_even_CMC4; - break; - case CM: - state = S_odd_CM_even_CMC4; - break; - } - break; - case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 3); - case CM: - state = S_odd_CMC4; - break; - } - break; - - case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 3); - case CM: - state = S_even_CM_odd_CMC4; - break; - } - break; - case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 1); - case CM: - state = S_odd_CM_odd_CMC4; - break; - } - break; - - case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 1); - case CM: - state = S_even_CM_even_CMC4; - break; - } - break; - case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 3); - case CM: - state = S_odd_CM_even_CMC4; - break; - } - break; - - case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)s; - case CM: - state = S_one_CM_odd_C4CM; /* CM C4 CM */ - break; - } - break; - case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)(s - 2); /* |CM C4 CM */ - case C4: - state = S_even_C4CM; - break; - case CM: - state = S_even_CM_odd_C4CM; - break; - } - break; - case S_even_C4CM: /* C4 CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 2); /* C4|CM C4 CM */ - case CM: - state = S_one_CM_even_C4CM; - break; - } - break; - case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ - case C4: - state = S_odd_C4CM; - break; - case CM: - state = S_even_CM_even_C4CM; - break; - } - break; - - case S_even_CM_odd_C4CM: /* CM CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 0); /* |CM CM|C4|CM */ - case CM: - state = S_odd_CM_odd_C4CM; - break; - } - break; - case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ - case CM: - state = S_even_CM_odd_C4CM; - break; - } - break; - - case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ - case CM: - state = S_odd_CM_even_C4CM; - break; - } - break; - case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ - switch (GB18030_MAP[*p]) { - case C1: - case C2: - case C4: - return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ - case CM: - state = S_even_CM_even_C4CM; - break; - } - break; - } - } - - DEBUG_GB18030(("state %d\n", state)); - switch (state) { - case S_START: return (UChar *)(s - 0); - case S_one_C2: return (UChar *)(s - 0); - case S_one_C4: return (UChar *)(s - 0); - case S_one_CM: return (UChar *)(s - 0); - - case S_odd_CM_one_CX: return (UChar *)(s - 1); - case S_even_CM_one_CX: return (UChar *)(s - 0); - - case S_one_CMC4: return (UChar *)(s - 1); - case S_odd_CMC4: return (UChar *)(s - 1); - case S_one_C4_odd_CMC4: return (UChar *)(s - 1); - case S_even_CMC4: return (UChar *)(s - 3); - case S_one_C4_even_CMC4: return (UChar *)(s - 3); - - case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); - case S_even_CM_odd_CMC4: return (UChar *)(s - 1); - - case S_odd_CM_even_CMC4: return (UChar *)(s - 1); - case S_even_CM_even_CMC4: return (UChar *)(s - 3); - - case S_odd_C4CM: return (UChar *)(s - 0); - case S_one_CM_odd_C4CM: return (UChar *)(s - 2); - case S_even_C4CM: return (UChar *)(s - 2); - case S_one_CM_even_C4CM: return (UChar *)(s - 0); - - case S_even_CM_odd_C4CM: return (UChar *)(s - 0); - case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); - case S_even_CM_even_C4CM: return (UChar *)(s - 2); - case S_odd_CM_even_C4CM: return (UChar *)(s - 0); - } - - return (UChar* )s; /* never come here. (escape warning) */ -} - -static int -gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - return GB18030_MAP[*s] == C1 ? TRUE : FALSE; -} - -OnigEncodingType OnigEncodingGB18030 = { - gb18030_mbc_enc_len, - "GB18030", /* name */ - 4, /* max enc length */ - 1, /* min enc length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - gb18030_mbc_to_code, - onigenc_mb4_code_to_mbclen, - gb18030_code_to_mbc, - gb18030_mbc_to_normalize, - gb18030_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - gb18030_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - gb18030_left_adjust_char_head, - gb18030_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c deleted file mode 100644 index 4dd708d841..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_1.c +++ /dev/null @@ -1,178 +0,0 @@ -/********************************************************************** - iso8859_1.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ - ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) - -static const unsigned short EncISO_8859_1_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_1_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_1_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingISO_8859_1 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-1", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_1_mbc_to_normalize, - iso_8859_1_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_1_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c deleted file mode 100644 index e317f49752..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_10.c +++ /dev/null @@ -1,327 +0,0 @@ -/********************************************************************** - iso8859_10.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ - ((EncISO_8859_10_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247', - '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_10_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_10_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_10_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_10_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa2, 0xb2 }, - { 0xa3, 0xb3 }, - { 0xa4, 0xb4 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa8, 0xb8 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - { 0xaf, 0xbf }, - - { 0xb1, 0xa1 }, - { 0xb2, 0xa2 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xa4 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_10 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-10", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_10_mbc_to_normalize, - iso_8859_10_is_mbc_ambiguous, - iso_8859_10_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_10_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c deleted file mode 100644 index 6afaa27f41..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_11.c +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** - iso8859_11.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ - ((EncISO_8859_11_CtypeTable[code] & ctype) != 0) - -static const unsigned short EncISO_8859_11_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -static int -iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_11_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingISO_8859_11 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-11", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_11_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c deleted file mode 100644 index abd7644527..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_13.c +++ /dev/null @@ -1,297 +0,0 @@ -/********************************************************************** - iso8859_13.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ - ((EncISO_8859_13_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_13_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0, - 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0 -}; - -static int -iso_8859_13_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_13_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xb5 are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xb5) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_13_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_13 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-13", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_13_mbc_to_normalize, - iso_8859_13_is_mbc_ambiguous, - iso_8859_13_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_13_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c deleted file mode 100644 index d76771a1cf..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_14.c +++ /dev/null @@ -1,327 +0,0 @@ -/********************************************************************** - iso8859_14.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ - ((EncISO_8859_14_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247', - '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377', - '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271', - '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_14_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0, - 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2, - 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2, - 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_14_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_14_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_14_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa4, 0xa5 }, - { 0xa5, 0xa4 }, - { 0xa6, 0xab }, - { 0xa8, 0xb8 }, - { 0xaa, 0xba }, - { 0xab, 0xa6 }, - { 0xac, 0xbc }, - { 0xaf, 0xff }, - - { 0xb0, 0xb1 }, - { 0xb1, 0xb0 }, - { 0xb2, 0xb3 }, - { 0xb3, 0xb2 }, - { 0xb4, 0xb5 }, - { 0xb5, 0xb4 }, - { 0xb7, 0xb9 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xb7 }, - { 0xba, 0xaa }, - { 0xbb, 0xbf }, - { 0xbc, 0xac }, - { 0xbd, 0xbe }, - { 0xbe, 0xbd }, - { 0xbf, 0xbb }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xaf } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_14 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-14", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_14_mbc_to_normalize, - iso_8859_14_is_mbc_ambiguous, - iso_8859_14_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_14_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c deleted file mode 100644 index d6611ed290..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_15.c +++ /dev/null @@ -1,307 +0,0 @@ -/********************************************************************** - iso8859_15.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ - ((EncISO_8859_15_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_15_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, - 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0, - 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_15_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_15_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf etc.. are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_15_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_15 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-15", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_15_mbc_to_normalize, - iso_8859_15_is_mbc_ambiguous, - iso_8859_15_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_15_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c deleted file mode 100644 index 23b868065c..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_16.c +++ /dev/null @@ -1,321 +0,0 @@ -/********************************************************************** - iso8859_16.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ - ((EncISO_8859_16_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', - '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', - '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_16_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0, - 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2, - 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0, - 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_16_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_16_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_16_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa3, 0xb3 }, - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - { 0xaa, 0xba }, - { 0xac, 0xae }, - { 0xae, 0xac }, - { 0xaf, 0xbf }, - - { 0xb2, 0xb9 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xb9, 0xb2 }, - { 0xba, 0xaa }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_16 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-16", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_16_mbc_to_normalize, - iso_8859_16_is_mbc_ambiguous, - iso_8859_16_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_16_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c deleted file mode 100644 index 5f21ff78ae..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_2.c +++ /dev/null @@ -1,319 +0,0 @@ -/********************************************************************** - iso8859_2.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ - ((EncISO_8859_2_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', - '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_2_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 -}; - -static int -iso_8859_2_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_2_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf is lower case letter, but can't convert. */ - if (*p == 0xdf) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa3, 0xb3 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - { 0xaf, 0xbf }, - - { 0xb1, 0xa1 }, - { 0xb3, 0xa3 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -static int -iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_2_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingISO_8859_2 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-2", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_2_mbc_to_normalize, - iso_8859_2_is_mbc_ambiguous, - iso_8859_2_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_2_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c deleted file mode 100644 index 9ac3dab179..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_3.c +++ /dev/null @@ -1,308 +0,0 @@ -/********************************************************************** - iso8859_3.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ - ((EncISO_8859_3_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247', - '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_3_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2, - 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 -}; - -static int -iso_8859_3_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_3_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xb5) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_3_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xaf, 0xbf }, - { 0xb1, 0xa1 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbf, 0xaf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_3 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-3", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_3_mbc_to_normalize, - iso_8859_3_is_mbc_ambiguous, - iso_8859_3_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_3_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c deleted file mode 100644 index c54a2fa149..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_4.c +++ /dev/null @@ -1,317 +0,0 @@ -/********************************************************************** - iso8859_4.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ - ((EncISO_8859_4_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', - '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_4_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0, - 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0, - 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0 -}; - -static int -iso_8859_4_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_4_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (*p == 0xdf || *p == 0xa2) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_4_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xb1 }, - { 0xa3, 0xb3 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - - { 0xb1, 0xa1 }, - { 0xb3, 0xa3 }, - { 0xb5, 0xa5 }, - { 0xb6, 0xa6 }, - { 0xb9, 0xa9 }, - { 0xba, 0xaa }, - { 0xbb, 0xab }, - { 0xbc, 0xac }, - { 0xbe, 0xae }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_4 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-4", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_4_mbc_to_normalize, - iso_8859_4_is_mbc_ambiguous, - iso_8859_4_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_4_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c deleted file mode 100644 index 5b941e2eb9..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_5.c +++ /dev/null @@ -1,296 +0,0 @@ -/********************************************************************** - iso8859_5.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ - ((EncISO_8859_5_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_5_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2 -}; - -static int -iso_8859_5_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_5_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_5_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xf1 }, - { 0xa2, 0xf2 }, - { 0xa3, 0xf3 }, - { 0xa4, 0xf4 }, - { 0xa5, 0xf5 }, - { 0xa6, 0xf6 }, - { 0xa7, 0xf7 }, - { 0xa8, 0xf8 }, - { 0xa9, 0xf9 }, - { 0xaa, 0xfa }, - { 0xab, 0xfb }, - { 0xac, 0xfc }, - { 0xae, 0xfe }, - { 0xaf, 0xff }, - - { 0xb0, 0xd0 }, - { 0xb1, 0xd1 }, - { 0xb2, 0xd2 }, - { 0xb3, 0xd3 }, - { 0xb4, 0xd4 }, - { 0xb5, 0xd5 }, - { 0xb6, 0xd6 }, - { 0xb7, 0xd7 }, - { 0xb8, 0xd8 }, - { 0xb9, 0xd9 }, - { 0xba, 0xda }, - { 0xbb, 0xdb }, - { 0xbc, 0xdc }, - { 0xbd, 0xdd }, - { 0xbe, 0xdf }, - { 0xbf, 0xdf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xb0 }, - { 0xd1, 0xb1 }, - { 0xd2, 0xb2 }, - { 0xd3, 0xb3 }, - { 0xd4, 0xb4 }, - { 0xd5, 0xb5 }, - { 0xd6, 0xb6 }, - { 0xd7, 0xb7 }, - { 0xd8, 0xb8 }, - { 0xd9, 0xb9 }, - { 0xda, 0xba }, - { 0xdb, 0xbb }, - { 0xdc, 0xbc }, - { 0xdd, 0xbd }, - { 0xde, 0xbe }, - { 0xdf, 0xbf }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf1, 0xa1 }, - { 0xf2, 0xa2 }, - { 0xf3, 0xa3 }, - { 0xf4, 0xa4 }, - { 0xf5, 0xa5 }, - { 0xf6, 0xa6 }, - { 0xf7, 0xa7 }, - { 0xf8, 0xa8 }, - { 0xf9, 0xa9 }, - { 0xfa, 0xaa }, - { 0xfb, 0xab }, - { 0xfc, 0xac }, - { 0xfe, 0xae }, - { 0xff, 0xaf } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_5 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-5", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_5_mbc_to_normalize, - iso_8859_5_is_mbc_ambiguous, - iso_8859_5_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_5_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c deleted file mode 100644 index bb5515d30b..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_6.c +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** - iso8859_6.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ - ((EncISO_8859_6_CtypeTable[code] & ctype) != 0) - -static const unsigned short EncISO_8859_6_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, - 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -static int -iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_6_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingISO_8859_6 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-6", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_6_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c deleted file mode 100644 index 2529dae666..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_7.c +++ /dev/null @@ -1,278 +0,0 @@ -/********************************************************************** - iso8859_7.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ - ((EncISO_8859_7_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', - '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', - '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_7_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2, - 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000 -}; - -static int -iso_8859_7_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_7_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - if (*p == 0xc0 || *p == 0xe0) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_7_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xb6, 0xdc }, - { 0xb8, 0xdd }, - { 0xb9, 0xde }, - { 0xba, 0xdf }, - { 0xbc, 0xfc }, - { 0xbe, 0xfd }, - { 0xbf, 0xfe }, - - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xb6 }, - { 0xdd, 0xb8 }, - { 0xde, 0xb9 }, - { 0xdf, 0xba }, - - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xbc }, - { 0xfd, 0xbe }, - { 0xfe, 0xbf } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_7 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-7", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_7_mbc_to_normalize, - iso_8859_7_is_mbc_ambiguous, - iso_8859_7_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_7_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c deleted file mode 100644 index d7f0fc5947..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_8.c +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************** - iso8859_8.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ - ((EncISO_8859_8_CtypeTable[code] & ctype) != 0) - -static const unsigned short EncISO_8859_8_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, - 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -static int -iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_8_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingISO_8859_8 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-8", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_to_normalize, - onigenc_ascii_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - iso_8859_8_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c deleted file mode 100644 index f4bcac1ae3..0000000000 --- a/ext/mbstring/oniguruma/enc/iso8859_9.c +++ /dev/null @@ -1,297 +0,0 @@ -/********************************************************************** - iso8859_9.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] -#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ - ((EncISO_8859_9_CtypeTable[code] & ctype) != 0) - -static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -static const unsigned short EncISO_8859_9_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static int -iso_8859_9_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncISO_8859_9_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xdf etc.. are lower case letter, but can't convert. */ - if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba)) - return FALSE; - else - return TRUE; - } - - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_ISO_8859_9_CTYPE(code, ctype); - else - return FALSE; -} - -static int -iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingISO_8859_9 = { - onigenc_single_byte_mbc_enc_len, - "ISO-8859-9", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - iso_8859_9_mbc_to_normalize, - iso_8859_9_is_mbc_ambiguous, - iso_8859_9_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_9_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c deleted file mode 100644 index 27f97f3072..0000000000 --- a/ext/mbstring/oniguruma/enc/koi8.c +++ /dev/null @@ -1,264 +0,0 @@ -/********************************************************************** - koi8.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c] -#define ENC_IS_KOI8_CTYPE(code,ctype) \ - ((EncKOI8_CtypeTable[code] & ctype) != 0) - -static const UChar EncKOI8_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' -}; - -static const unsigned short EncKOI8_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 -}; - -static int -koi8_mbc_to_normalize(OnigAmbigType flag, - const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower) -{ - const OnigUChar* p = *pp; - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_KOI8_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) -{ - const OnigUChar* p = *pp; - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncKOI8_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - - -static int -koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_KOI8_CTYPE(code, ctype); - else - return FALSE; -} - -static int -koi8_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - { 0xdf, 0xff }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfe, 0xde }, - { 0xff, 0xdf } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingKOI8 = { - onigenc_single_byte_mbc_enc_len, - "KOI8", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - koi8_mbc_to_normalize, - koi8_is_mbc_ambiguous, - koi8_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - koi8_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c deleted file mode 100644 index d2a4440f2c..0000000000 --- a/ext/mbstring/oniguruma/enc/koi8_r.c +++ /dev/null @@ -1,263 +0,0 @@ -/********************************************************************** - koi8_r.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] -#define ENC_IS_KOI8_R_CTYPE(code,ctype) \ - ((EncKOI8_R_CtypeTable[code] & ctype) != 0) - -static const UChar EncKOI8_R_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' -}; - -static const unsigned short EncKOI8_R_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, - 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2 -}; - -static int -koi8_r_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ -} - -static int -koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp)++; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - int v = (EncKOI8_R_CtypeTable[*p] & - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - return (v != 0 ? TRUE : FALSE); - } - return FALSE; -} - -static int -koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) - return ENC_IS_KOI8_R_CTYPE(code, ctype); - else - return FALSE; -} - -static int -koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) -{ - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, - { 0xdf, 0xff }, - - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, - - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfe, 0xde }, - { 0xff, 0xdf } - }; - - if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { - *ccs = OnigAsciiPairAmbigCodes; - return 52; - } - if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { - *ccs = cc; - return sizeof(cc) / sizeof(OnigPairAmbigCodes); - } - else - return 0; -} - -OnigEncodingType OnigEncodingKOI8_R = { - onigenc_single_byte_mbc_enc_len, - "KOI8-R", /* name */ - 1, /* max enc length */ - 1, /* min enc length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - koi8_r_mbc_to_normalize, - koi8_r_is_mbc_ambiguous, - koi8_r_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - koi8_r_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c deleted file mode 100644 index 6b9ef4c5b5..0000000000 --- a/ext/mbstring/oniguruma/enc/mktable.c +++ /dev/null @@ -1,1119 +0,0 @@ -/********************************************************************** - mktable.c -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <stdio.h> - -#define NOT_RUBY -#include "regenc.h" - -#define UNICODE_ISO_8859_1 0 -#define ISO_8859_1 1 -#define ISO_8859_2 2 -#define ISO_8859_3 3 -#define ISO_8859_4 4 -#define ISO_8859_5 5 -#define ISO_8859_6 6 -#define ISO_8859_7 7 -#define ISO_8859_8 8 -#define ISO_8859_9 9 -#define ISO_8859_10 10 -#define ISO_8859_11 11 -#define ISO_8859_13 12 -#define ISO_8859_14 13 -#define ISO_8859_15 14 -#define ISO_8859_16 15 -#define KOI8 16 -#define KOI8_R 17 - -typedef struct { - int num; - char* name; -} ENC_INFO; - -static ENC_INFO Info[] = { - { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, - { ISO_8859_1, "ISO_8859_1" }, - { ISO_8859_2, "ISO_8859_2" }, - { ISO_8859_3, "ISO_8859_3" }, - { ISO_8859_4, "ISO_8859_4" }, - { ISO_8859_5, "ISO_8859_5" }, - { ISO_8859_6, "ISO_8859_6" }, - { ISO_8859_7, "ISO_8859_7" }, - { ISO_8859_8, "ISO_8859_8" }, - { ISO_8859_9, "ISO_8859_9" }, - { ISO_8859_10, "ISO_8859_10" }, - { ISO_8859_11, "ISO_8859_11" }, - { ISO_8859_13, "ISO_8859_13" }, - { ISO_8859_14, "ISO_8859_14" }, - { ISO_8859_15, "ISO_8859_15" }, - { ISO_8859_16, "ISO_8859_16" }, - { KOI8, "KOI8" }, - { KOI8_R, "KOI8_R" } -}; - - -static int IsAlpha(int enc, int c) -{ - if (c >= 0x41 && c <= 0x5a) return 1; - if (c >= 0x61 && c <= 0x7a) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_9: - if (c == 0xaa) return 1; - if (c == 0xb5) return 1; - if (c == 0xba) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - break; - - case ISO_8859_2: - if (c == 0xa1 || c == 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c >= 0xae && c <= 0xaf) return 1; - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_3: - if (c == 0xa1) return 1; - if (c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xaf) return 1; - if (c == 0xb1) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c == 0xbf) return 1; - if (c >= 0xc0 && c <= 0xc2) return 1; - if (c >= 0xc4 && c <= 0xcf) return 1; - if (c >= 0xd1 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xe2) return 1; - if (c >= 0xe4 && c <= 0xef) return 1; - if (c >= 0xf1 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_4: - if (c >= 0xa1 && c <= 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xae) return 1; - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_5: - if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; - if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; - break; - - case ISO_8859_6: - if (c >= 0xc1 && c <= 0xda) return 1; - if (c >= 0xe0 && c <= 0xf2) return 1; - break; - - case ISO_8859_7: - if (c == 0xb6) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c == 0xc0) return 1; - if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; - if (c >= 0xdc && c <= 0xfe) return 1; - break; - - case ISO_8859_8: - if (c == 0xb5) return 1; - if (c >= 0xe0 && c <= 0xfa) return 1; - break; - - case ISO_8859_10: - if (c >= 0xa1 && c <= 0xa6) return 1; - if (c >= 0xa8 && c <= 0xac) return 1; - if (c == 0xae || c == 0xaf) return 1; - if (c >= 0xb1 && c <= 0xb6) return 1; - if (c >= 0xb8 && c <= 0xbc) return 1; - if (c >= 0xbe && c <= 0xff) return 1; - break; - - case ISO_8859_11: - if (c >= 0xa1 && c <= 0xda) return 1; - if (c >= 0xdf && c <= 0xfb) return 1; - break; - - case ISO_8859_13: - if (c == 0xa8) return 1; - if (c == 0xaa) return 1; - if (c == 0xaf) return 1; - if (c == 0xb5) return 1; - if (c == 0xb8) return 1; - if (c == 0xba) return 1; - if (c >= 0xbf && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_14: - if (c == 0xa1 || c == 0xa2) return 1; - if (c == 0xa4 || c == 0xa5) return 1; - if (c == 0xa6 || c == 0xa8) return 1; - if (c >= 0xaa && c <= 0xac) return 1; - if (c >= 0xaf && c <= 0xb5) return 1; - if (c >= 0xb7 && c <= 0xff) return 1; - break; - - case ISO_8859_15: - if (c == 0xaa) return 1; - if (c == 0xb5) return 1; - if (c == 0xba) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - if (c == 0xa6) return 1; - if (c == 0xa8) return 1; - if (c == 0xb4) return 1; - if (c == 0xb8) return 1; - if (c == 0xbc) return 1; - if (c == 0xbd) return 1; - if (c == 0xbe) return 1; - break; - - case ISO_8859_16: - if (c == 0xa1) return 1; - if (c == 0xa2) return 1; - if (c == 0xa3) return 1; - if (c == 0xa6) return 1; - if (c == 0xa8) return 1; - if (c == 0xaa) return 1; - if (c == 0xac) return 1; - if (c == 0xae) return 1; - if (c == 0xaf) return 1; - if (c == 0xb2) return 1; - if (c == 0xb3) return 1; - if (c == 0xb4) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbc) return 1; - if (c == 0xbd) return 1; - if (c == 0xbe) return 1; - if (c == 0xbf) return 1; - if (c >= 0xc0 && c <= 0xde) return 1; - if (c >= 0xdf && c <= 0xff) return 1; - break; - - case KOI8_R: - if (c == 0xa3 || c == 0xb3) return 1; - /* fall */ - case KOI8: - if (c >= 0xc0 && c <= 0xff) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsBlank(int enc, int c) -{ - if (c == 0x09 || c == 0x20) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_2: - case ISO_8859_3: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_6: - case ISO_8859_7: - case ISO_8859_8: - case ISO_8859_9: - case ISO_8859_10: - case ISO_8859_11: - case ISO_8859_13: - case ISO_8859_14: - case ISO_8859_15: - case ISO_8859_16: - case KOI8: - if (c == 0xa0) return 1; - break; - - case KOI8_R: - if (c == 0x9a) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsCntrl(int enc, int c) -{ - if (c >= 0x00 && c <= 0x1F) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - if (c == 0xad) return 1; - /* fall */ - case ISO_8859_1: - case ISO_8859_2: - case ISO_8859_3: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_6: - case ISO_8859_7: - case ISO_8859_8: - case ISO_8859_9: - case ISO_8859_10: - case ISO_8859_11: - case ISO_8859_13: - case ISO_8859_14: - case ISO_8859_15: - case ISO_8859_16: - case KOI8: - if (c >= 0x7f && c <= 0x9F) return 1; - break; - - - case KOI8_R: - if (c == 0x7f) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsDigit(int enc, int c) -{ - if (c >= 0x30 && c <= 0x39) return 1; - return 0; -} - -static int IsGraph(int enc, int c) -{ - if (c >= 0x21 && c <= 0x7e) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_2: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_9: - case ISO_8859_10: - case ISO_8859_13: - case ISO_8859_14: - case ISO_8859_15: - case ISO_8859_16: - if (c >= 0xa1 && c <= 0xff) return 1; - break; - - case ISO_8859_3: - if (c >= 0xa1) { - if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || - c == 0xe3 || c == 0xf0) - return 0; - else - return 1; - } - break; - - case ISO_8859_6: - if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) - return 1; - if (c >= 0xc1 && c <= 0xda) return 1; - if (c >= 0xe0 && c <= 0xf2) return 1; - break; - - case ISO_8859_7: - if (c >= 0xa1 && c <= 0xfe && - c != 0xa4 && c != 0xa5 && c != 0xaa && - c != 0xae && c != 0xd2) return 1; - break; - - case ISO_8859_8: - if (c >= 0xa2 && c <= 0xfa) { - if (c >= 0xbf && c <= 0xde) return 0; - return 1; - } - break; - - case ISO_8859_11: - if (c >= 0xa1 && c <= 0xda) return 1; - if (c >= 0xdf && c <= 0xfb) return 1; - break; - - case KOI8: - if (c >= 0xc0 && c <= 0xff) return 1; - break; - - case KOI8_R: - if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsLower(int enc, int c) -{ - if (c >= 0x61 && c <= 0x7a) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_9: - if (c == 0xaa) return 1; - if (c == 0xb5) return 1; - if (c == 0xba) return 1; - if (c >= 0xdf && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - break; - - case ISO_8859_2: - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c >= 0xdf && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_3: - if (c == 0xb1) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c == 0xbf) return 1; - if (c == 0xdf) return 1; - if (c >= 0xe0 && c <= 0xe2) return 1; - if (c >= 0xe4 && c <= 0xef) return 1; - if (c >= 0xf1 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_4: - if (c == 0xa2) return 1; - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c == 0xdf) return 1; - if (c >= 0xe0 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_5: - if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; - break; - - case ISO_8859_6: - break; - - case ISO_8859_7: - if (c == 0xc0) return 1; - if (c >= 0xdc && c <= 0xfe) return 1; - break; - - case ISO_8859_8: - if (c == 0xb5) return 1; - break; - - case ISO_8859_10: - if (c >= 0xb1 && c <= 0xb6) return 1; - if (c >= 0xb8 && c <= 0xbc) return 1; - if (c == 0xbe || c == 0xbf) return 1; - if (c >= 0xdf && c <= 0xff) return 1; - break; - - case ISO_8859_11: - break; - - case ISO_8859_13: - if (c == 0xb5) return 1; - if (c == 0xb8) return 1; - if (c == 0xba) return 1; - if (c == 0xbf) return 1; - if (c >= 0xdf && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_14: - if (c == 0xa2) return 1; - if (c == 0xa5) return 1; - if (c == 0xab) return 1; - if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbc) return 1; - if (c == 0xbe || c == 0xbf) return 1; - if (c >= 0xdf && c <= 0xff) return 1; - break; - - case ISO_8859_15: - if (c == 0xaa) return 1; - if (c == 0xb5) return 1; - if (c == 0xba) return 1; - if (c >= 0xdf && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - if (c == 0xa8) return 1; - if (c == 0xb8) return 1; - if (c == 0xbd) return 1; - break; - - case ISO_8859_16: - if (c == 0xa2) return 1; - if (c == 0xa8) return 1; - if (c == 0xae) return 1; - if (c == 0xb3) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbd) return 1; - if (c == 0xbf) return 1; - if (c >= 0xdf && c <= 0xff) return 1; - break; - - case KOI8_R: - if (c == 0xa3) return 1; - /* fall */ - case KOI8: - if (c >= 0xc0 && c <= 0xdf) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsPrint(int enc, int c) -{ - if (c >= 0x20 && c <= 0x7e) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - if (c >= 0x09 && c <= 0x0d) return 1; - if (c == 0x85) return 1; - /* fall */ - case ISO_8859_1: - case ISO_8859_2: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_9: - case ISO_8859_10: - case ISO_8859_13: - case ISO_8859_14: - case ISO_8859_15: - case ISO_8859_16: - if (c >= 0xa0 && c <= 0xff) return 1; - break; - - case ISO_8859_3: - if (c >= 0xa0) { - if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || - c == 0xe3 || c == 0xf0) - return 0; - else - return 1; - } - break; - - case ISO_8859_6: - if (c == 0xa0) return 1; - if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) - return 1; - if (c >= 0xc1 && c <= 0xda) return 1; - if (c >= 0xe0 && c <= 0xf2) return 1; - break; - - case ISO_8859_7: - if (c >= 0xa0 && c <= 0xfe && - c != 0xa4 && c != 0xa5 && c != 0xaa && - c != 0xae && c != 0xd2) return 1; - break; - - case ISO_8859_8: - if (c >= 0xa0 && c <= 0xfa) { - if (c >= 0xbf && c <= 0xde) return 0; - if (c == 0xa1) return 0; - return 1; - } - break; - - case ISO_8859_11: - if (c >= 0xa0 && c <= 0xda) return 1; - if (c >= 0xdf && c <= 0xfb) return 1; - break; - - case KOI8: - if (c == 0xa0) return 1; - if (c >= 0xc0 && c <= 0xff) return 1; - break; - - case KOI8_R: - if (c >= 0x80 && c <= 0xff) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsPunct(int enc, int c) -{ - if (enc == UNICODE_ISO_8859_1) { - if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || - c == 0x7c || c == 0x7e) return 1; - if (c >= 0x3c && c <= 0x3e) return 1; - } - - if (c >= 0x21 && c <= 0x23) return 1; - if (c >= 0x25 && c <= 0x2a) return 1; - if (c >= 0x2c && c <= 0x2f) return 1; - if (c >= 0x3a && c <= 0x3b) return 1; - if (c >= 0x3f && c <= 0x40) return 1; - if (c >= 0x5b && c <= 0x5d) return 1; - if (c == 0x5f) return 1; - if (c == 0x7b) return 1; - if (c == 0x7d) return 1; - - switch (enc) { - case ISO_8859_1: - case ISO_8859_9: - case ISO_8859_15: - if (c == 0xad) return 1; - /* fall */ - case UNICODE_ISO_8859_1: - if (c == 0xa1) return 1; - if (c == 0xab) return 1; - if (c == 0xb7) return 1; - if (c == 0xbb) return 1; - if (c == 0xbf) return 1; - break; - - case ISO_8859_2: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_14: - if (c == 0xad) return 1; - break; - - case ISO_8859_3: - case ISO_8859_10: - if (c == 0xad) return 1; - if (c == 0xb7) return 1; - if (c == 0xbd) return 1; - break; - - case ISO_8859_6: - if (c == 0xac) return 1; - if (c == 0xad) return 1; - if (c == 0xbb) return 1; - if (c == 0xbf) return 1; - break; - - case ISO_8859_7: - if (c == 0xa1 || c == 0xa2) return 1; - if (c == 0xab) return 1; - if (c == 0xaf) return 1; - if (c == 0xad) return 1; - if (c == 0xb7 || c == 0xbb) return 1; - break; - - case ISO_8859_8: - if (c == 0xab) return 1; - if (c == 0xad) return 1; - if (c == 0xb7) return 1; - if (c == 0xbb) return 1; - if (c == 0xdf) return 1; - break; - - case ISO_8859_13: - if (c == 0xa1 || c == 0xa5) return 1; - if (c == 0xab || c == 0xad) return 1; - if (c == 0xb4 || c == 0xb7) return 1; - if (c == 0xbb) return 1; - if (c == 0xff) return 1; - break; - - case ISO_8859_16: - if (c == 0xa5) return 1; - if (c == 0xab) return 1; - if (c == 0xad) return 1; - if (c == 0xb5) return 1; - if (c == 0xb7) return 1; - if (c == 0xbb) return 1; - break; - - case KOI8_R: - if (c == 0x9e) return 1; - break; - - case ISO_8859_11: - case KOI8: - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsSpace(int enc, int c) -{ - if (c >= 0x09 && c <= 0x0d) return 1; - if (c == 0x20) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - if (c == 0x85) return 1; - /* fall */ - case ISO_8859_1: - case ISO_8859_2: - case ISO_8859_3: - case ISO_8859_4: - case ISO_8859_5: - case ISO_8859_6: - case ISO_8859_7: - case ISO_8859_8: - case ISO_8859_9: - case ISO_8859_10: - case ISO_8859_11: - case ISO_8859_13: - case ISO_8859_14: - case ISO_8859_15: - case ISO_8859_16: - case KOI8: - if (c == 0xa0) return 1; - break; - - case KOI8_R: - if (c == 0x9a) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsUpper(int enc, int c) -{ - if (c >= 0x41 && c <= 0x5a) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_9: - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - break; - - case ISO_8859_2: - if (c == 0xa1 || c == 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c >= 0xae && c <= 0xaf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - break; - - case ISO_8859_3: - if (c == 0xa1) return 1; - if (c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xaf) return 1; - if (c >= 0xc0 && c <= 0xc2) return 1; - if (c >= 0xc4 && c <= 0xcf) return 1; - if (c >= 0xd1 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - break; - - case ISO_8859_4: - if (c == 0xa1 || c == 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xae) return 1; - if (c == 0xbd) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - break; - - case ISO_8859_5: - if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; - break; - - case ISO_8859_6: - break; - - case ISO_8859_7: - if (c == 0xb6) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; - break; - - case ISO_8859_8: - case ISO_8859_11: - break; - - case ISO_8859_10: - if (c >= 0xa1 && c <= 0xa6) return 1; - if (c >= 0xa8 && c <= 0xac) return 1; - if (c == 0xae || c == 0xaf) return 1; - if (c >= 0xc0 && c <= 0xde) return 1; - break; - - case ISO_8859_13: - if (c == 0xa8) return 1; - if (c == 0xaa) return 1; - if (c == 0xaf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - break; - - case ISO_8859_14: - if (c == 0xa1) return 1; - if (c == 0xa4 || c == 0xa6) return 1; - if (c == 0xa8) return 1; - if (c == 0xaa || c == 0xac) return 1; - if (c == 0xaf || c == 0xb0) return 1; - if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1; - if (c == 0xbb || c == 0xbd) return 1; - if (c >= 0xc0 && c <= 0xde) return 1; - break; - - case ISO_8859_15: - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xde) return 1; - if (c == 0xa6) return 1; - if (c == 0xb4) return 1; - if (c == 0xbc) return 1; - if (c == 0xbe) return 1; - break; - - case ISO_8859_16: - if (c == 0xa1) return 1; - if (c == 0xa3) return 1; - if (c == 0xa6) return 1; - if (c == 0xaa) return 1; - if (c == 0xac) return 1; - if (c == 0xaf) return 1; - if (c == 0xb2) return 1; - if (c == 0xb4) return 1; - if (c == 0xbc) return 1; - if (c == 0xbe) return 1; - if (c >= 0xc0 && c <= 0xde) return 1; - break; - - case KOI8_R: - if (c == 0xb3) return 1; - /* fall */ - case KOI8: - if (c >= 0xe0 && c <= 0xff) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsXDigit(int enc, int c) -{ - if (c >= 0x30 && c <= 0x39) return 1; - if (c >= 0x41 && c <= 0x46) return 1; - if (c >= 0x61 && c <= 0x66) return 1; - return 0; -} - -static int IsWord(int enc, int c) -{ - if (c >= 0x30 && c <= 0x39) return 1; - if (c >= 0x41 && c <= 0x5a) return 1; - if (c == 0x5f) return 1; - if (c >= 0x61 && c <= 0x7a) return 1; - - switch (enc) { - case UNICODE_ISO_8859_1: - case ISO_8859_1: - case ISO_8859_9: - if (c == 0xaa) return 1; - if (c >= 0xb2 && c <= 0xb3) return 1; - if (c == 0xb5) return 1; - if (c >= 0xb9 && c <= 0xba) return 1; - if (c >= 0xbc && c <= 0xbe) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - break; - - case ISO_8859_2: - if (c == 0xa1 || c == 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c >= 0xae && c <= 0xaf) return 1; - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbc) return 1; - if (c >= 0xbe && c <= 0xbf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_3: - if (c == 0xa1) return 1; - if (c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xaf) return 1; - if (c >= 0xb1 && c <= 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbd) return 1; - if (c == 0xbf) return 1; - if (c >= 0xc0 && c <= 0xc2) return 1; - if (c >= 0xc4 && c <= 0xcf) return 1; - if (c >= 0xd1 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xe2) return 1; - if (c >= 0xe4 && c <= 0xef) return 1; - if (c >= 0xf1 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_4: - if (c >= 0xa1 && c <= 0xa3) return 1; - if (c == 0xa5 || c == 0xa6) return 1; - if (c >= 0xa9 && c <= 0xac) return 1; - if (c == 0xae) return 1; - if (c == 0xb1 || c == 0xb3) return 1; - if (c == 0xb5 || c == 0xb6) return 1; - if (c >= 0xb9 && c <= 0xbf) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_5: - if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; - if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; - break; - - case ISO_8859_6: - if (c >= 0xc1 && c <= 0xda) return 1; - if (c >= 0xe0 && c <= 0xea) return 1; - if (c >= 0xeb && c <= 0xf2) return 1; - break; - - case ISO_8859_7: - if (c == 0xb2 || c == 0xb3) return 1; - if (c == 0xb6) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c >= 0xbc && c <= 0xbf) return 1; - if (c == 0xc0) return 1; - if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; - if (c >= 0xdc && c <= 0xfe) return 1; - break; - - case ISO_8859_8: - if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; - if (c >= 0xbc && c <= 0xbe) return 1; - if (c >= 0xe0 && c <= 0xfa) return 1; - break; - - case ISO_8859_10: - if (c >= 0xa1 && c <= 0xff) { - if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd) - return 1; - } - break; - - case ISO_8859_11: - if (c >= 0xa1 && c <= 0xda) return 1; - if (c >= 0xdf && c <= 0xfb) return 1; - break; - - case ISO_8859_13: - if (c == 0xa8) return 1; - if (c == 0xaa) return 1; - if (c == 0xaf) return 1; - if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; - if (c >= 0xbc && c <= 0xbe) return 1; - if (c == 0xb8) return 1; - if (c == 0xba) return 1; - if (c >= 0xbf && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xfe) return 1; - break; - - case ISO_8859_14: - if (c >= 0xa1 && c <= 0xff) { - if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae || - c == 0xb6) return 0; - return 1; - } - break; - - case ISO_8859_15: - if (c == 0xaa) return 1; - if (c >= 0xb2 && c <= 0xb3) return 1; - if (c == 0xb5) return 1; - if (c >= 0xb9 && c <= 0xba) return 1; - if (c >= 0xbc && c <= 0xbe) return 1; - if (c >= 0xc0 && c <= 0xd6) return 1; - if (c >= 0xd8 && c <= 0xf6) return 1; - if (c >= 0xf8 && c <= 0xff) return 1; - if (c == 0xa6) return 1; - if (c == 0xa8) return 1; - if (c == 0xb4) return 1; - if (c == 0xb8) return 1; - break; - - case ISO_8859_16: - if (c == 0xa1) return 1; - if (c == 0xa2) return 1; - if (c == 0xa3) return 1; - if (c == 0xa6) return 1; - if (c == 0xa8) return 1; - if (c == 0xaa) return 1; - if (c == 0xac) return 1; - if (c == 0xae) return 1; - if (c == 0xaf) return 1; - if (c == 0xb2) return 1; - if (c == 0xb3) return 1; - if (c == 0xb4) return 1; - if (c >= 0xb8 && c <= 0xba) return 1; - if (c == 0xbc) return 1; - if (c == 0xbd) return 1; - if (c == 0xbe) return 1; - if (c == 0xbf) return 1; - if (c >= 0xc0 && c <= 0xde) return 1; - if (c >= 0xdf && c <= 0xff) return 1; - break; - - case KOI8_R: - if (c == 0x9d) return 1; - if (c == 0xa3 || c == 0xb3) return 1; - /* fall */ - case KOI8: - if (c >= 0xc0 && c <= 0xff) return 1; - break; - - default: - exit(-1); - } - - return 0; -} - -static int IsAscii(int enc, int c) -{ - if (c >= 0x00 && c <= 0x7f) return 1; - return 0; -} - -static int IsNewline(int enc, int c) -{ - if (c == 0x0a) return 1; - return 0; -} - -static int exec(FILE* fp, ENC_INFO* einfo) -{ -#define NCOL 8 - - int c, val, enc; - - enc = einfo->num; - - fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n", - einfo->name); - - for (c = 0; c < 256; c++) { - val = 0; - if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE; - if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA; - if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK; - if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL; - if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT; - if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH; - if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER; - if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT; - if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT; - if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE; - if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER; - if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT; - if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD; - if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII; - - if (c % NCOL == 0) fputs(" ", fp); - fprintf(fp, "0x%04x", val); - if (c != 255) fputs(",", fp); - if (c != 0 && c % NCOL == (NCOL-1)) - fputs("\n", fp); - else - fputs(" ", fp); - } - fprintf(fp, "};\n"); - return 0; -} - -extern int main(int argc, char* argv[]) -{ - int i; - FILE* fp = stdout; - - for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) { - exec(fp, &Info[i]); - } -} diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c deleted file mode 100644 index f7d7d52265..0000000000 --- a/ext/mbstring/oniguruma/enc/sjis.c +++ /dev/null @@ -1,238 +0,0 @@ -/********************************************************************** - sjis.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static const int EncLen_SJIS[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 -}; - -static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 -}; - -#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) -#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] - -static int -sjis_mbc_enc_len(const UChar* p) -{ - return EncLen_SJIS[*p]; -} - -static int -sjis_code_to_mbclen(OnigCodePoint code) -{ - if (code < 256) { - if (EncLen_SJIS[(int )code] == 1) - return 1; - else - return 0; - } - else if (code <= 0xffff) { - return 2; - } - else - return 0; -} - -static OnigCodePoint -sjis_mbc_to_code(const UChar* p, const UChar* end) -{ - int c, i, len; - OnigCodePoint n; - - len = enc_len(ONIG_ENCODING_SJIS, p); - c = *p++; - n = c; - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - return n; -} - -static int -sjis_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); - *p++ = (UChar )(code & 0xff); - -#if 0 - if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf)) - return REGERR_INVALID_WIDE_CHAR_VALUE; -#endif - return p - buf; -} - -static int -sjis_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp)++; - return 1; - } - else { - int len = enc_len(ONIG_ENCODING_SJIS, p); - - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); - -} - -static int -sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if ((ctype & (ONIGENC_CTYPE_WORD | - ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { - return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -sjis_left_adjust_char_head(const UChar* start, const UChar* s) -{ - const UChar *p; - int len; - - if (s <= start) return (UChar* )s; - p = s; - - if (SJIS_ISMB_TRAIL(*p)) { - while (p > start) { - if (! SJIS_ISMB_FIRST(*--p)) { - p++; - break; - } - } - } - len = enc_len(ONIG_ENCODING_SJIS, p); - if (p + len > s) return (UChar* )p; - p += len; - return (UChar* )(p + ((s - p) & ~1)); -} - -static int -sjis_is_allowed_reverse_match(const UChar* s, const UChar* end) -{ - const UChar c = *s; - return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); -} - -OnigEncodingType OnigEncodingSJIS = { - sjis_mbc_enc_len, - "Shift_JIS", /* name */ - 2, /* max byte length */ - 1, /* min byte length */ - ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - onigenc_is_mbc_newline_0x0a, - sjis_mbc_to_code, - sjis_code_to_mbclen, - sjis_code_to_mbc, - sjis_mbc_to_normalize, - sjis_is_mbc_ambiguous, - onigenc_ascii_get_all_pair_ambig_codes, - onigenc_nothing_get_all_comp_ambig_codes, - sjis_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - sjis_left_adjust_char_head, - sjis_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c deleted file mode 100644 index a8cf539014..0000000000 --- a/ext/mbstring/oniguruma/enc/unicode.c +++ /dev/null @@ -1,3403 +0,0 @@ -/********************************************************************** - unicode.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - - -const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, - 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, - 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, - 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 -}; - -static const OnigCodePoint CRAlnum[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 414, -#else - 9, -#endif - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bef, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f29, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x1371, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRAlnum */ - -static const OnigCodePoint CRAlpha[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 396, -#else - 8, -#endif - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09f0, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRAlpha */ - -static const OnigCodePoint CRBlank[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 9, -#else - 3, -#endif - 0x0009, 0x0009, - 0x0020, 0x0020, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRBlank */ - -static const OnigCodePoint CRCntrl[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 19, -#else - 3, -#endif - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRCntrl */ - -static const OnigCodePoint CRDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 23, -#else - 1, -#endif - 0x0030, 0x0039 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be7, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x1369, 0x1371, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRDigit */ - -static const OnigCodePoint CRGraph[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 405, -#else - 2, -#endif - 0x0021, 0x007e, - 0x00a1, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x2054, - 0x2057, 0x2057, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRGraph */ - -static const OnigCodePoint CRLower[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 424, -#else - 6, -#endif - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0236, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fb, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d6b, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213d, 0x213d, - 0x2146, 0x2149, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a3, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRLower */ - -static const OnigCodePoint CRPrint[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 405, -#else - 4, -#endif - 0x0009, 0x000d, - 0x0020, 0x007e, - 0x0085, 0x0085, - 0x00a0, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2054, - 0x2057, 0x2057, - 0x205f, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRPrint */ - -static const OnigCodePoint CRPunct[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 86, -#else - 14, -#endif - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d, - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x2054, - 0x2057, 0x2057, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRPunct */ - -static const OnigCodePoint CRSpace[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 11, -#else - 4, -#endif - 0x0009, 0x000d, - 0x0020, 0x0020, - 0x0085, 0x0085, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRSpace */ - -static const OnigCodePoint CRUpper[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 421, -#else - 3, -#endif - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x0400, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRUpper */ - -static const OnigCodePoint CRXDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 3, -#else - 3, -#endif - 0x0030, 0x0039, - 0x0041, 0x0046, - 0x0061, 0x0066 -}; - -static const OnigCodePoint CRASCII[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 1, -#else - 1, -#endif - 0x0000, 0x007f -}; - -static const OnigCodePoint CRWord[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 436, -#else - 12, -#endif - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x005f, 0x005f, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b2, 0x00b3, - 0x00b5, 0x00b5, - 0x00b9, 0x00ba, - 0x00bc, 0x00be, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, -#ifndef USE_UNICODE_FULL_RANGE_CTYPE - 0x00f8, 0x7fffffff -#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ - 0x00f8, 0x0236, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x09f4, 0x09f9, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bf2, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f33, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x3005, 0x3007, - 0x3021, 0x302f, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff3f, 0xff3f, - 0xff41, 0xff5a, - 0xff65, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of CRWord */ - - -extern int -onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - return onig_is_in_code_range((UChar* )CRAlpha, code); - break; - case ONIGENC_CTYPE_BLANK: - return onig_is_in_code_range((UChar* )CRBlank, code); - break; - case ONIGENC_CTYPE_CNTRL: - return onig_is_in_code_range((UChar* )CRCntrl, code); - break; - case ONIGENC_CTYPE_DIGIT: - return onig_is_in_code_range((UChar* )CRDigit, code); - break; - case ONIGENC_CTYPE_GRAPH: - return onig_is_in_code_range((UChar* )CRGraph, code); - break; - case ONIGENC_CTYPE_LOWER: - return onig_is_in_code_range((UChar* )CRLower, code); - break; - case ONIGENC_CTYPE_PRINT: - return onig_is_in_code_range((UChar* )CRPrint, code); - break; - case ONIGENC_CTYPE_PUNCT: - return onig_is_in_code_range((UChar* )CRPunct, code); - break; - case ONIGENC_CTYPE_SPACE: - return onig_is_in_code_range((UChar* )CRSpace, code); - break; - case ONIGENC_CTYPE_UPPER: - return onig_is_in_code_range((UChar* )CRUpper, code); - break; - case ONIGENC_CTYPE_XDIGIT: - return FALSE; - break; - case ONIGENC_CTYPE_WORD: - return onig_is_in_code_range((UChar* )CRWord, code); - break; - case ONIGENC_CTYPE_ASCII: - return FALSE; - break; - case ONIGENC_CTYPE_ALNUM: - return onig_is_in_code_range((UChar* )CRAlnum, code); - break; - case ONIGENC_CTYPE_NEWLINE: - return FALSE; - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - -#else - - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - return TRUE; - } - return FALSE; -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -} - -extern int -onigenc_unicode_get_ctype_code_range(int ctype, - const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) -{ - static const OnigCodePoint EmptyRange[] = { 0 }; - -#define CR_SET(list) do { \ - *mbr = list; \ -} while (0) - - *sbr = EmptyRange; - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - CR_SET(CRAlpha); - break; - case ONIGENC_CTYPE_BLANK: - CR_SET(CRBlank); - break; - case ONIGENC_CTYPE_CNTRL: - CR_SET(CRCntrl); - break; - case ONIGENC_CTYPE_DIGIT: - CR_SET(CRDigit); - break; - case ONIGENC_CTYPE_GRAPH: - CR_SET(CRGraph); - break; - case ONIGENC_CTYPE_LOWER: - CR_SET(CRLower); - break; - case ONIGENC_CTYPE_PRINT: - CR_SET(CRPrint); - break; - case ONIGENC_CTYPE_PUNCT: - CR_SET(CRPunct); - break; - case ONIGENC_CTYPE_SPACE: - CR_SET(CRSpace); - break; - case ONIGENC_CTYPE_UPPER: - CR_SET(CRUpper); - break; - case ONIGENC_CTYPE_XDIGIT: - CR_SET(CRXDigit); - break; - case ONIGENC_CTYPE_WORD: - CR_SET(CRWord); - break; - case ONIGENC_CTYPE_ASCII: - CR_SET(CRASCII); - break; - case ONIGENC_CTYPE_ALNUM: - CR_SET(CRAlnum); - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return 0; -} diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c deleted file mode 100755 index 0dd2832f70..0000000000 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ /dev/null @@ -1,259 +0,0 @@ -/********************************************************************** - utf16_be.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) - -static const int EncLen_UTF16[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static int -utf16be_mbc_enc_len(const UChar* p) -{ - return EncLen_UTF16[*p]; -} - -static int -utf16be_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p + 1 < end) { - if (*(p+1) == 0x0a && *p == 0x00) - return 1; -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00) - return 1; - if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) - return 1; -#endif - } - return 0; -} - -static OnigCodePoint -utf16be_mbc_to_code(const UChar* p, const UChar* end) -{ - OnigCodePoint code; - - if (UTF16_IS_SURROGATE_FIRST(*p)) { - code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) - + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) - + p[3]; - } - else { - code = p[0] * 256 + p[1]; - } - return code; -} - -static int -utf16be_code_to_mbclen(OnigCodePoint code) -{ - return (code > 0xffff ? 4 : 2); -} - -static int -utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar* p = buf; - - if (code > 0xffff) { - unsigned int plane, high; - - plane = code >> 16; - *p++ = (plane >> 2) + 0xd8; - high = (code & 0xff00) >> 8; - *p++ = ((plane & 0x03) << 6) + (high >> 2); - *p++ = (high & 0x02) + 0xdc; - *p = (UChar )(code & 0xff); - return 4; - } - else { - *p++ = (UChar )((code & 0xff00) >> 8); - *p++ = (UChar )(code & 0xff); - return 2; - } -} - -static int -utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - const UChar* p = *pp; - - if (*p == 0) { - p++; - if (end > p + 2 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 4; - return 2; - } - - *lower++ = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp) += 2; - return 2; /* return byte length of converted char to lower */ - } - else { - int len; - len = EncLen_UTF16[*p]; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*p]; - - if (*p == 0) { - int c, v; - - p++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 2 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - (*pp) += 2; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -utf16be_left_adjust_char_head(const UChar* start, const UChar* s) -{ - if (s <= start) return (UChar* )s; - - if ((s - start) % 2 == 1) { - s--; - } - - if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) - s -= 2; - - return (UChar* )s; -} - -OnigEncodingType OnigEncodingUTF16_BE = { - utf16be_mbc_enc_len, - "UTF-16BE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf16be_is_mbc_newline, - utf16be_mbc_to_code, - utf16be_code_to_mbclen, - utf16be_code_to_mbc, - utf16be_mbc_to_normalize, - utf16be_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, - utf16be_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c deleted file mode 100755 index 93cc6138a6..0000000000 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ /dev/null @@ -1,254 +0,0 @@ -/********************************************************************** - utf16_le.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) - -static const int EncLen_UTF16[] = { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static int -utf16le_code_to_mbclen(OnigCodePoint code) -{ - return (code > 0xffff ? 4 : 2); -} - -static int -utf16le_mbc_enc_len(const UChar* p) -{ - return EncLen_UTF16[*(p+1)]; -} - -static int -utf16le_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p + 1 < end) { - if (*p == 0x0a && *(p+1) == 0x00) - return 1; -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00) - return 1; - if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) - return 1; -#endif - } - return 0; -} - -static OnigCodePoint -utf16le_mbc_to_code(const UChar* p, const UChar* end) -{ - OnigCodePoint code; - UChar c0 = *p; - UChar c1 = *(p+1); - - if (UTF16_IS_SURROGATE_FIRST(c1)) { - code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) - + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) - + p[2]; - } - else { - code = c1 * 256 + p[0]; - } - return code; -} - -static int -utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar* p = buf; - - if (code > 0xffff) { - unsigned int plane, high; - - plane = code >> 16; - high = (code & 0xff00) >> 8; - - *p++ = ((plane & 0x03) << 6) + (high >> 2); - *p++ = (plane >> 2) + 0xd8; - *p++ = (UChar )(code & 0xff); - *p = (high & 0x02) + 0xdc; - return 4; - } - else { - *p++ = (UChar )(code & 0xff); - *p++ = (UChar )((code & 0xff00) >> 8); - return 2; - } -} - -static int -utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - const UChar* p = *pp; - - if (*(p+1) == 0) { - if (end > p + 3 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - *lower++ = 0xdf; - *lower = '\0'; - (*pp) += 4; - return 2; - } - - *(lower+1) = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp) += 2; - return 2; /* return byte length of converted char to lower */ - } - else { - int len = EncLen_UTF16[*(p+1)]; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*(p+1)]; - - if (*(p+1) == 0) { - int c, v; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 3 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - (*pp) += 2; - return TRUE; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -utf16le_left_adjust_char_head(const UChar* start, const UChar* s) -{ - if (s <= start) return (UChar* )s; - - if ((s - start) % 2 == 1) { - s--; - } - - if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) - s -= 2; - - return (UChar* )s; -} - -OnigEncodingType OnigEncodingUTF16_LE = { - utf16le_mbc_enc_len, - "UTF-16LE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf16le_is_mbc_newline, - utf16le_mbc_to_code, - utf16le_code_to_mbclen, - utf16le_code_to_mbc, - utf16le_mbc_to_normalize, - utf16le_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, - utf16le_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c deleted file mode 100755 index 36b477286c..0000000000 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ /dev/null @@ -1,216 +0,0 @@ -/********************************************************************** - utf32_be.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static int -utf32be_mbc_enc_len(const UChar* p) -{ - return 4; -} - -static int -utf32be_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p + 3 < end) { - if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) - return 1; -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*(p+3) == 0x0d || *(p+3) == 0x85) - && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) - return 1; - if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) - && *(p+1) == 0 && *p == 0) - return 1; -#endif - } - return 0; -} - -static OnigCodePoint -utf32be_mbc_to_code(const UChar* p, const UChar* end) -{ - return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); -} - -static int -utf32be_code_to_mbclen(OnigCodePoint code) -{ - return 4; -} - -static int -utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar* p = buf; - - *p++ = (UChar )((code & 0xff000000) >>24); - *p++ = (UChar )((code & 0xff0000) >>16); - *p++ = (UChar )((code & 0xff00) >> 8); - *p++ = (UChar ) (code & 0xff); - return 4; -} - -static int -utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - const UChar* p = *pp; - - if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { - p += 3; - if (end > p + 4 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - *lower++ = '\0'; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 8; - return 4; - } - - *lower++ = '\0'; - *lower++ = '\0'; - *lower++ = '\0'; - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - - (*pp) += 4; - return 4; /* return byte length of converted char to lower */ - } - else { - int len = 4; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += 4; - - if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { - int c, v; - - p += 3; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 4 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -utf32be_left_adjust_char_head(const UChar* start, const UChar* s) -{ - int rem; - - if (s <= start) return (UChar* )s; - - rem = (s - start) % 4; - return (UChar* )(s - rem); -} - -OnigEncodingType OnigEncodingUTF32_BE = { - utf32be_mbc_enc_len, - "UTF-32BE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf32be_is_mbc_newline, - utf32be_mbc_to_code, - utf32be_code_to_mbclen, - utf32be_code_to_mbc, - utf32be_mbc_to_normalize, - utf32be_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, - utf32be_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c deleted file mode 100755 index 1e9487d1d9..0000000000 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************************** - utf32_le.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static int -utf32le_mbc_enc_len(const UChar* p) -{ - return 4; -} - -static int -utf32le_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p + 3 < end) { - if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) - return 1; -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00 - && (p+2) == 0x00 && *(p+3) == 0x00) - return 1; - if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) - && *(p+2) == 0x00 && *(p+3) == 0x00) - return 1; -#endif - } - return 0; -} - -static OnigCodePoint -utf32le_mbc_to_code(const UChar* p, const UChar* end) -{ - return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); -} - -static int -utf32le_code_to_mbclen(OnigCodePoint code) -{ - return 4; -} - -static int -utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - UChar* p = buf; - - *p++ = (UChar ) (code & 0xff); - *p++ = (UChar )((code & 0xff00) >> 8); - *p++ = (UChar )((code & 0xff0000) >>16); - *p++ = (UChar )((code & 0xff000000) >>24); - return 4; -} - -static int -utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, - UChar* lower) -{ - const UChar* p = *pp; - - if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { - if (end > p + 7 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - *lower++ = 0xdf; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = '\0'; - (*pp) += 8; - return 4; - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); - } - else { - *lower++ = *p; - } - *lower++ = '\0'; - *lower++ = '\0'; - *lower = '\0'; - - (*pp) += 4; - return 4; /* return byte length of converted char to lower */ - } - else { - int len = 4; - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += 4; - - if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { - int c, v; - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 7 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - ONIGENC_IS_MBC_ASCII(p)) || - ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && - !ONIGENC_IS_MBC_ASCII(p))) { - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c, - (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)); - if ((v | ONIGENC_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - } - - return FALSE; -} - -static UChar* -utf32le_left_adjust_char_head(const UChar* start, const UChar* s) -{ - int rem; - - if (s <= start) return (UChar* )s; - - rem = (s - start) % 4; - return (UChar* )(s - rem); -} - -OnigEncodingType OnigEncodingUTF32_LE = { - utf32le_mbc_enc_len, - "UTF-32LE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf32le_is_mbc_newline, - utf32le_mbc_to_code, - utf32le_code_to_mbclen, - utf32le_code_to_mbc, - utf32le_mbc_to_normalize, - utf32le_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - onigenc_unicode_is_code_ctype, - onigenc_unicode_get_ctype_code_range, - utf32le_left_adjust_char_head, - onigenc_always_false_is_allowed_reverse_match -}; diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c deleted file mode 100644 index 0e816176ba..0000000000 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ /dev/null @@ -1,3764 +0,0 @@ -/********************************************************************** - utf8.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define USE_INVALID_CODE_SCHEME - -#ifdef USE_INVALID_CODE_SCHEME -/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ -#define INVALID_CODE_FE 0xfffffffe -#define INVALID_CODE_FF 0xffffffff -#define VALID_CODE_LIMIT 0x7fffffff -#endif - -#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) - -static const int EncLen_UTF8[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 -}; - -static int -utf8_mbc_enc_len(const UChar* p) -{ - return EncLen_UTF8[*p]; -} - -static int -utf8_is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p < end) { - if (*p == 0x0a) return 1; - -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS - if (*p == 0x0d) return 1; - if (p + 1 < end) { - if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ - return 1; - if (p + 2 < end) { - if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) - && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ - return 1; - } - } -#endif - } - - return 0; -} - -static OnigCodePoint -utf8_mbc_to_code(const UChar* p, const UChar* end) -{ - int c, len; - OnigCodePoint n; - - len = enc_len(ONIG_ENCODING_UTF8, p); - c = *p++; - if (len > 1) { - len--; - n = c & ((1 << (6 - len)) - 1); - while (len--) { - c = *p++; - n = (n << 6) | (c & ((1 << 6) - 1)); - } - return n; - } - else { -#ifdef USE_INVALID_CODE_SCHEME - if (c > 0xfd) { - return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); - } -#endif - return (OnigCodePoint )c; - } -} - -static int -utf8_code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xffffff80) == 0) return 1; - else if ((code & 0xfffff800) == 0) { - if (code <= 0xff && code >= 0xfe) - return 1; - return 2; - } - else if ((code & 0xffff0000) == 0) return 3; - else if ((code & 0xffe00000) == 0) return 4; - else if ((code & 0xfc000000) == 0) return 5; - else if ((code & 0x80000000) == 0) return 6; -#ifdef USE_INVALID_CODE_SCHEME - else if (code == INVALID_CODE_FE) return 1; - else if (code == INVALID_CODE_FF) return 1; -#endif - else - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; -} - -#if 0 -static int -utf8_code_to_mbc_first(OnigCodePoint code) -{ - if ((code & 0xffffff80) == 0) - return code; - else { - if ((code & 0xfffff800) == 0) - return ((code>>6)& 0x1f) | 0xc0; - else if ((code & 0xffff0000) == 0) - return ((code>>12) & 0x0f) | 0xe0; - else if ((code & 0xffe00000) == 0) - return ((code>>18) & 0x07) | 0xf0; - else if ((code & 0xfc000000) == 0) - return ((code>>24) & 0x03) | 0xf8; - else if ((code & 0x80000000) == 0) - return ((code>>30) & 0x01) | 0xfc; - else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; - } - } -} -#endif - -static int -utf8_code_to_mbc(OnigCodePoint code, UChar *buf) -{ -#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) -#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) - - if ((code & 0xffffff80) == 0) { - *buf = (UChar )code; - return 1; - } - else { - UChar *p = buf; - - if ((code & 0xfffff800) == 0) { - *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); - } - else if ((code & 0xffff0000) == 0) { - *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0xffe00000) == 0) { - *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0xfc000000) == 0) { - *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); - *p++ = UTF8_TRAILS(code, 18); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0x80000000) == 0) { - *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); - *p++ = UTF8_TRAILS(code, 24); - *p++ = UTF8_TRAILS(code, 18); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } -#ifdef USE_INVALID_CODE_SCHEME - else if (code == INVALID_CODE_FE) { - *p = 0xfe; - return 1; - } - else if (code == INVALID_CODE_FF) { - *p = 0xff; - return 1; - } -#endif - else { - return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; - } - - *p++ = UTF8_TRAIL0(code); - return p - buf; - } -} - -static int -utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } - - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - } - else { - *lower = *p; - } - (*pp)++; - return 1; /* return byte length of converted char to lower */ - } - else { - int len; - - if (*p == 195) { /* 195 == '\303' */ - int c = *(p + 1); - if (c >= 128) { - if (c <= (UChar )'\236' && /* upper */ - (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c != (UChar )'\227') { - *lower++ = *p; - *lower = (UChar )(c + 32); - (*pp) += 2; - return 2; - } - } -#if 0 - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } -#endif - } - } - - len = enc_len(ONIG_ENCODING_UTF8, p); - if (lower != p) { - int i; - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - } - (*pp) += len; - return len; /* return byte length of converted char to lower */ - } -} - -static int -utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - (*pp) += 2; - return TRUE; - } - - (*pp)++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - } - else { - (*pp) += enc_len(ONIG_ENCODING_UTF8, p); - - if (*p == 195) { /* 195 == '\303' */ - int c = *(p + 1); - if (c >= 128) { - if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { - if (c <= (UChar )'\236') { /* upper */ - if (c == (UChar )'\227') return FALSE; - return TRUE; - } - else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */ - if (c == (UChar )'\267') return FALSE; - return TRUE; - } - } - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - return TRUE; - } - } - } - } - - return FALSE; -} - - -static const OnigCodePoint EmptyRange[] = { 0 }; - -static const OnigCodePoint SBAlnum[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBAlnum[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 411, -#else - 6, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bef, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f29, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x1371, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBAlnum */ - -static const OnigCodePoint SBAlpha[] = { - 2, - 0x0041, 0x005a, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBAlpha[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 394, -#else - 6, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09f0, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBAlpha */ - -static const OnigCodePoint SBBlank[] = { - 2, - 0x0009, 0x0009, - 0x0020, 0x0020 -}; - -static const OnigCodePoint MBBlank[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 7, -#else - 1, -#endif - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBBlank */ - -static const OnigCodePoint SBCntrl[] = { - 2, - 0x0000, 0x001f, - 0x007f, 0x007f -}; - -static const OnigCodePoint MBCntrl[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 18, -#else - 2, -#endif - 0x0080, 0x009f, - 0x00ad, 0x00ad -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBCntrl */ - -static const OnigCodePoint SBDigit[] = { - 1, - 0x0030, 0x0039 -}; - -static const OnigCodePoint MBDigit[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 22, -#else - 0 -#endif -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be7, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x1369, 0x1371, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBDigit */ - -static const OnigCodePoint SBGraph[] = { - 1, - 0x0021, 0x007e -}; - -static const OnigCodePoint MBGraph[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 404, -#else - 1, -#endif - 0x00a1, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x2054, - 0x2057, 0x2057, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBGraph */ - -static const OnigCodePoint SBLower[] = { - 1, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBLower[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 423, -#else - 5, -#endif - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0236, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fb, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d6b, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213d, 0x213d, - 0x2146, 0x2149, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a3, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBLower */ - -static const OnigCodePoint SBPrint[] = { - 2, - 0x0009, 0x000d, - 0x0020, 0x007e -}; - -static const OnigCodePoint MBPrint[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 403, -#else - 2, -#endif - 0x0085, 0x0085, - 0x00a0, 0x0236 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0250, 0x0357, - 0x035d, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03fb, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060c, 0x0615, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x10fb, 0x10fb, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1361, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x19e0, 0x19ff, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2054, - 0x2057, 0x2057, - 0x205f, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x20a0, 0x20b1, - 0x20d0, 0x20ea, - 0x2100, 0x213b, - 0x213d, 0x214b, - 0x2153, 0x2183, - 0x2190, 0x23d0, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x2617, - 0x2619, 0x267d, - 0x2680, 0x2691, - 0x26a0, 0x26a1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27d0, 0x27eb, - 0x27f0, 0x2b0d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x327d, - 0x327f, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fa5, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x1039f, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBPrint */ - -static const OnigCodePoint SBPunct[] = { - 9, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d -}; /* end of SBPunct */ - -static const OnigCodePoint MBPunct[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 77, -#else - 5, -#endif - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x2054, - 0x2057, 0x2057, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBPunct */ - -static const OnigCodePoint SBSpace[] = { - 2, - 0x0009, 0x000d, - 0x0020, 0x0020 -}; - -static const OnigCodePoint MBSpace[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 9, -#else - 2, -#endif - 0x0085, 0x0085, - 0x00a0, 0x00a0 -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBSpace */ - -static const OnigCodePoint SBUpper[] = { - 1, - 0x0041, 0x005a -}; - -static const OnigCodePoint MBUpper[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 420, -#else - 2, -#endif - 0x00c0, 0x00d6, - 0x00d8, 0x00de -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - , - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x0400, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBUpper */ - -static const OnigCodePoint SBXDigit[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x0046, - 0x0061, 0x0066 -}; - -static const OnigCodePoint SBASCII[] = { - 1, - 0x0000, 0x007f -}; - -static const OnigCodePoint SBWord[] = { - 4, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x005f, 0x005f, - 0x0061, 0x007a -}; - -static const OnigCodePoint MBWord[] = { -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - 432, -#else - 8, -#endif - 0x00aa, 0x00aa, - 0x00b2, 0x00b3, - 0x00b5, 0x00b5, - 0x00b9, 0x00ba, - 0x00bc, 0x00be, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, -#ifndef USE_UNICODE_FULL_RANGE_CTYPE - 0x00f8, 0x7fffffff -#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ - 0x00f8, 0x0236, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x0357, - 0x035d, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x03fb, - 0x0400, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f5, - 0x04f8, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05a1, - 0x05a3, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c4, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x0658, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x074f, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x09f4, 0x09f9, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb5, - 0x0bb7, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be7, 0x0bf2, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f33, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10f8, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1206, - 0x1208, 0x1246, - 0x1248, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1286, - 0x1288, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12ae, - 0x12b0, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12ce, - 0x12d0, 0x12d6, - 0x12d8, 0x12ee, - 0x12f0, 0x130e, - 0x1310, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x131e, - 0x1320, 0x1346, - 0x1348, 0x135a, - 0x1369, 0x137c, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1d00, 0x1d6b, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x20d0, 0x20ea, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213d, 0x213f, - 0x2145, 0x2149, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x3005, 0x3007, - 0x3021, 0x302f, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fa5, - 0xa000, 0xa48c, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff3f, 0xff3f, - 0xff41, 0xff5a, - 0xff65, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a3, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ -}; /* end of MBWord */ - - -static int -utf8_get_ctype_code_range(int ctype, - const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) -{ -#define CR_SET(sbl,mbl) do { \ - *sbr = sbl; \ - *mbr = mbl; \ -} while (0) - -#define CR_SB_SET(sbl) do { \ - *sbr = sbl; \ - *mbr = EmptyRange; \ -} while (0) - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - CR_SET(SBAlpha, MBAlpha); - break; - case ONIGENC_CTYPE_BLANK: - CR_SET(SBBlank, MBBlank); - break; - case ONIGENC_CTYPE_CNTRL: - CR_SET(SBCntrl, MBCntrl); - break; - case ONIGENC_CTYPE_DIGIT: - CR_SET(SBDigit, MBDigit); - break; - case ONIGENC_CTYPE_GRAPH: - CR_SET(SBGraph, MBGraph); - break; - case ONIGENC_CTYPE_LOWER: - CR_SET(SBLower, MBLower); - break; - case ONIGENC_CTYPE_PRINT: - CR_SET(SBPrint, MBPrint); - break; - case ONIGENC_CTYPE_PUNCT: - CR_SET(SBPunct, MBPunct); - break; - case ONIGENC_CTYPE_SPACE: - CR_SET(SBSpace, MBSpace); - break; - case ONIGENC_CTYPE_UPPER: - CR_SET(SBUpper, MBUpper); - break; - case ONIGENC_CTYPE_XDIGIT: - CR_SB_SET(SBXDigit); - break; - case ONIGENC_CTYPE_WORD: - CR_SET(SBWord, MBWord); - break; - case ONIGENC_CTYPE_ASCII: - CR_SB_SET(SBASCII); - break; - case ONIGENC_CTYPE_ALNUM: - CR_SET(SBAlnum, MBAlnum); - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return 0; -} - -static int -utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - const OnigCodePoint *range; -#endif - - if (code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - -#ifdef USE_UNICODE_FULL_RANGE_CTYPE - - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - range = MBAlpha; - break; - case ONIGENC_CTYPE_BLANK: - range = MBBlank; - break; - case ONIGENC_CTYPE_CNTRL: - range = MBCntrl; - break; - case ONIGENC_CTYPE_DIGIT: - range = MBDigit; - break; - case ONIGENC_CTYPE_GRAPH: - range = MBGraph; - break; - case ONIGENC_CTYPE_LOWER: - range = MBLower; - break; - case ONIGENC_CTYPE_PRINT: - range = MBPrint; - break; - case ONIGENC_CTYPE_PUNCT: - range = MBPunct; - break; - case ONIGENC_CTYPE_SPACE: - range = MBSpace; - break; - case ONIGENC_CTYPE_UPPER: - range = MBUpper; - break; - case ONIGENC_CTYPE_XDIGIT: - return FALSE; - break; - case ONIGENC_CTYPE_WORD: - range = MBWord; - break; - case ONIGENC_CTYPE_ASCII: - return FALSE; - break; - case ONIGENC_CTYPE_ALNUM: - range = MBAlnum; - break; - case ONIGENC_CTYPE_NEWLINE: - return FALSE; - break; - - default: - return ONIGENCERR_TYPE_BUG; - break; - } - - return onig_is_in_code_range((UChar* )range, code); - -#else - - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { -#ifdef USE_INVALID_CODE_SCHEME - if (code <= VALID_CODE_LIMIT) -#endif - return TRUE; - } -#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ - - return FALSE; -} - -static UChar* -utf8_left_adjust_char_head(const UChar* start, const UChar* s) -{ - const UChar *p; - - if (s <= start) return (UChar* )s; - p = s; - - while (!utf8_islead(*p) && p > start) p--; - return (UChar* )p; -} - -OnigEncodingType OnigEncodingUTF8 = { - utf8_mbc_enc_len, - "UTF-8", /* name */ - 6, /* max byte length */ - 1, /* min byte length */ - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - }, - utf8_is_mbc_newline, - utf8_mbc_to_code, - utf8_code_to_mbclen, - utf8_code_to_mbc, - utf8_mbc_to_normalize, - utf8_is_mbc_ambiguous, - onigenc_iso_8859_1_get_all_pair_ambig_codes, - onigenc_ess_tsett_get_all_comp_ambig_codes, - utf8_is_code_ctype, - utf8_get_ctype_code_range, - utf8_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; |