summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma/enc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma/enc')
-rw-r--r--ext/mbstring/oniguruma/enc/ascii.c67
-rw-r--r--ext/mbstring/oniguruma/enc/big5.c168
-rw-r--r--ext/mbstring/oniguruma/enc/euc_jp.c228
-rw-r--r--ext/mbstring/oniguruma/enc/euc_kr.c173
-rw-r--r--ext/mbstring/oniguruma/enc/euc_tw.c144
-rw-r--r--ext/mbstring/oniguruma/enc/gb18030.c501
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_1.c151
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_10.c300
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_11.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_13.c268
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_14.c298
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_15.c279
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_16.c292
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_2.c292
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_3.c281
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_4.c290
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_5.c296
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_6.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_7.c278
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_8.c105
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_9.c270
-rw-r--r--ext/mbstring/oniguruma/enc/koi8.c264
-rw-r--r--ext/mbstring/oniguruma/enc/koi8_r.c266
-rw-r--r--ext/mbstring/oniguruma/enc/mktable.c1115
-rw-r--r--ext/mbstring/oniguruma/enc/sjis.c238
-rw-r--r--ext/mbstring/oniguruma/enc/unicode.c3403
-rw-r--r--ext/mbstring/oniguruma/enc/utf16_be.c232
-rw-r--r--ext/mbstring/oniguruma/enc/utf16_le.c230
-rw-r--r--ext/mbstring/oniguruma/enc/utf32_be.c187
-rw-r--r--ext/mbstring/oniguruma/enc/utf32_le.c185
-rw-r--r--ext/mbstring/oniguruma/enc/utf8.c3730
31 files changed, 14741 insertions, 0 deletions
diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c
new file mode 100644
index 0000000..64be21d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/ascii.c
@@ -0,0 +1,67 @@
+/**********************************************************************
+ ascii.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingASCII = {
+ onigenc_single_byte_mbc_enc_len,
+ "US-ASCII", /* name */
+ 1, /* max byte length */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ ascii_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
new file mode 100644
index 0000000..8679266
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -0,0 +1,168 @@
+/**********************************************************************
+ big5.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_BIG5[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+big5_mbc_enc_len(const UChar* p)
+{
+ return EncLen_BIG5[*p];
+}
+
+static OnigCodePoint
+big5_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
+}
+
+static int
+big5_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
+}
+
+static int
+big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
+ pp, end, lower);
+}
+
+static int
+big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
+}
+
+static int
+big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
+}
+
+static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
+
+#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
+#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
+
+static UChar*
+big5_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ if (BIG5_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! BIG5_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_BIG5, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+
+ return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingBIG5 = {
+ big5_mbc_enc_len,
+ "Big5", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ big5_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ big5_code_to_mbc,
+ big5_mbc_to_normalize,
+ big5_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ big5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ big5_left_adjust_char_head,
+ big5_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c
new file mode 100644
index 0000000..71c81ee
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_jp.c
@@ -0,0 +1,228 @@
+/**********************************************************************
+ euc_jp.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
+
+static const int EncLen_EUCJP[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+eucjp_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCJP[*p];
+}
+
+static OnigCodePoint
+eucjp_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ n = (OnigCodePoint )*p++;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+eucjp_code_to_mbclen(OnigCodePoint code)
+{
+ if (ONIGENC_IS_CODE_ASCII(code)) return 1;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 0;
+}
+
+#if 0
+static int
+eucjp_code_to_mbc_first(OnigCodePoint code)
+{
+ int first;
+
+ if ((code & 0xff0000) != 0) {
+ first = (code >> 16) & 0xff;
+ }
+ else if ((code & 0xff00) != 0) {
+ first = (code >> 8) & 0xff;
+ }
+ else {
+ return (int )code;
+ }
+ return first;
+}
+#endif
+
+static int
+eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 1
+ if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+eucjp_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ int len;
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1;
+ }
+ else {
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
+}
+
+static int
+eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* In this encoding
+ mb-trail bytes doesn't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!eucjp_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e || c == 0x8e || c == 0x8f)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_JP = {
+ eucjp_mbc_enc_len,
+ "EUC-JP", /* name */
+ 3, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ eucjp_mbc_to_code,
+ eucjp_code_to_mbclen,
+ eucjp_code_to_mbc,
+ eucjp_mbc_to_normalize,
+ eucjp_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ eucjp_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ eucjp_left_adjust_char_head,
+ eucjp_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c
new file mode 100644
index 0000000..57bf801
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_kr.c
@@ -0,0 +1,173 @@
+/**********************************************************************
+ euc_kr.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_EUCKR[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euckr_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCKR[*p];
+}
+
+static OnigCodePoint
+euckr_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
+}
+
+static int
+euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
+}
+
+static int
+euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
+ pp, end, lower);
+}
+
+static int
+euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
+}
+
+static int
+euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
+}
+
+#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
+
+static UChar*
+euckr_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* Assumed in this encoding,
+ mb-trail bytes don't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!euckr_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_KR, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e) return TRUE;
+ else return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_KR = {
+ euckr_mbc_enc_len,
+ "EUC-KR", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euckr_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbc,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euckr_left_adjust_char_head,
+ euckr_is_allowed_reverse_match
+};
+
+/* Same with OnigEncodingEUC_KR except the name */
+OnigEncodingType OnigEncodingEUC_CN = {
+ euckr_mbc_enc_len,
+ "EUC-CN", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euckr_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbc,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euckr_left_adjust_char_head,
+ euckr_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c
new file mode 100644
index 0000000..6f396e7
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/euc_tw.c
@@ -0,0 +1,144 @@
+/**********************************************************************
+ euc_tw.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_EUCTW[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euctw_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCTW[*p];
+}
+
+static OnigCodePoint
+euctw_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
+}
+
+static int
+euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
+}
+
+static int
+euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
+ pp, end, lower);
+}
+
+static int
+euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
+}
+
+static int
+euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
+}
+
+#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
+
+static UChar*
+euctw_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ /* Assumed in this encoding,
+ mb-trail bytes don't mix with single bytes.
+ */
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!euctw_islead(*p) && p > start) p--;
+ len = enc_len(ONIG_ENCODING_EUC_TW, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ if (c <= 0x7e) return TRUE;
+ else return FALSE;
+}
+
+OnigEncodingType OnigEncodingEUC_TW = {
+ euctw_mbc_enc_len,
+ "EUC-TW", /* name */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ euctw_mbc_to_code,
+ onigenc_mb4_code_to_mbclen,
+ euctw_code_to_mbc,
+ euctw_mbc_to_normalize,
+ euctw_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euctw_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euctw_left_adjust_char_head,
+ euctw_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c
new file mode 100644
index 0000000..01995ea
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/gb18030.c
@@ -0,0 +1,501 @@
+/**********************************************************************
+ gb18030.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2005 KUBO Takehiro <kubo AT jiubao DOT org>
+ * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#if 1
+#define DEBUG_GB18030(arg)
+#else
+#define DEBUG_GB18030(arg) printf arg
+#endif
+
+enum {
+ C1, /* one-byte char */
+ C2, /* one-byte or second of two-byte char */
+ C4, /* one-byte or second or fourth of four-byte char */
+ CM /* first of two- or four-byte char or second of two-byte char */
+};
+
+static const char GB18030_MAP[] = {
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
+ C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
+};
+
+static int
+gb18030_mbc_enc_len(const UChar* p)
+{
+ if (GB18030_MAP[*p] != CM)
+ return 1;
+ p++;
+ if (GB18030_MAP[*p] == C4)
+ return 4;
+ if (GB18030_MAP[*p] == C1)
+ return 1; /* illegal sequence */
+ return 2;
+}
+
+static OnigCodePoint
+gb18030_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
+}
+
+static int
+gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
+}
+
+static int
+gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag,
+ pp, end, lower);
+}
+
+static int
+gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
+}
+
+static int
+gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
+}
+
+enum state {
+ S_START,
+ S_one_C2,
+ S_one_C4,
+ S_one_CM,
+
+ S_odd_CM_one_CX,
+ S_even_CM_one_CX,
+
+ /* CMC4 : pair of "CM C4" */
+ S_one_CMC4,
+ S_odd_CMC4,
+ S_one_C4_odd_CMC4,
+ S_even_CMC4,
+ S_one_C4_even_CMC4,
+
+ S_odd_CM_odd_CMC4,
+ S_even_CM_odd_CMC4,
+
+ S_odd_CM_even_CMC4,
+ S_even_CM_even_CMC4,
+
+ /* C4CM : pair of "C4 CM" */
+ S_odd_C4CM,
+ S_one_CM_odd_C4CM,
+ S_even_C4CM,
+ S_one_CM_even_C4CM,
+
+ S_even_CM_odd_C4CM,
+ S_odd_CM_odd_C4CM,
+ S_even_CM_even_C4CM,
+ S_odd_CM_even_C4CM,
+};
+
+static UChar*
+gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ enum state state = S_START;
+
+ DEBUG_GB18030(("----------------\n"));
+ for (p = s; p >= start; p--) {
+ DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
+ switch (state) {
+ case S_START:
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ return (UChar *)s;
+ case C2:
+ state = S_one_C2; /* C2 */
+ break;
+ case C4:
+ state = S_one_C4; /* C4 */
+ break;
+ case CM:
+ state = S_one_CM; /* CM */
+ break;
+ }
+ break;
+ case S_one_C2: /* C2 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM C2 */
+ break;
+ }
+ break;
+ case S_one_C4: /* C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CMC4;
+ break;
+ }
+ break;
+ case S_one_CM: /* CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)s;
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM CM */
+ break;
+ }
+ break;
+
+ case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_one_CX;
+ break;
+ }
+ break;
+ case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX;
+ break;
+ }
+ break;
+
+ case S_one_CMC4: /* CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4; /* C4 CM C4 */
+ break;
+ case CM:
+ state = S_even_CM_one_CX; /* CM CM C4 */
+ break;
+ }
+ break;
+ case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_odd_CMC4: /* C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CMC4; /* CM C4 CM C4 */
+ break;
+ }
+ break;
+ case S_even_CMC4: /* CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 3);
+ case C4:
+ state = S_one_C4_even_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_even_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CM_odd_C4CM; /* CM C4 CM */
+ break;
+ }
+ break;
+ case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 2); /* |CM C4 CM */
+ case C4:
+ state = S_even_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_even_C4CM: /* C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* C4|CM C4 CM */
+ case CM:
+ state = S_one_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_odd_C4CM: /* CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|C4|CM */
+ case CM:
+ state = S_odd_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
+ case CM:
+ state = S_odd_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+ }
+ }
+
+ DEBUG_GB18030(("state %d\n", state));
+ switch (state) {
+ case S_START: return (UChar *)(s - 0);
+ case S_one_C2: return (UChar *)(s - 0);
+ case S_one_C4: return (UChar *)(s - 0);
+ case S_one_CM: return (UChar *)(s - 0);
+
+ case S_odd_CM_one_CX: return (UChar *)(s - 1);
+ case S_even_CM_one_CX: return (UChar *)(s - 0);
+
+ case S_one_CMC4: return (UChar *)(s - 1);
+ case S_odd_CMC4: return (UChar *)(s - 1);
+ case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
+ case S_even_CMC4: return (UChar *)(s - 3);
+ case S_one_C4_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
+ case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
+
+ case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
+ case S_even_CM_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_C4CM: return (UChar *)(s - 0);
+ case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_C4CM: return (UChar *)(s - 2);
+ case S_one_CM_even_C4CM: return (UChar *)(s - 0);
+
+ case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
+ case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_CM_even_C4CM: return (UChar *)(s - 2);
+ case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
+ }
+
+ return (UChar* )s; /* never come here. (escape warning) */
+}
+
+static int
+gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
+}
+
+OnigEncodingType OnigEncodingGB18030 = {
+ gb18030_mbc_enc_len,
+ "GB18030", /* name */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ gb18030_mbc_to_code,
+ onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbc,
+ gb18030_mbc_to_normalize,
+ gb18030_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ gb18030_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ gb18030_left_adjust_char_head,
+ gb18030_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c
new file mode 100644
index 0000000..5646f26
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_1.c
@@ -0,0 +1,151 @@
+/**********************************************************************
+ iso8859_1.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
+ ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_1_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_1 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-1", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_1_mbc_to_normalize,
+ iso_8859_1_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_1_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c
new file mode 100644
index 0000000..8081ef8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_10.c
@@ -0,0 +1,300 @@
+/**********************************************************************
+ iso8859_10.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
+ ((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247',
+ '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_10_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_10_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb2, 0xa2 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xa4 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_10 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-10", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_10_mbc_to_normalize,
+ iso_8859_10_is_mbc_ambiguous,
+ iso_8859_10_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_10_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c
new file mode 100644
index 0000000..de9bb3b
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_11.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_11.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
+ ((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_11_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_11 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-11", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_11_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c
new file mode 100644
index 0000000..69316ed
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_13.c
@@ -0,0 +1,268 @@
+/**********************************************************************
+ iso8859_13.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
+ ((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_13_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_13_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xb5 are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_13 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-13", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c
new file mode 100644
index 0000000..44638cf
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_14.c
@@ -0,0 +1,298 @@
+/**********************************************************************
+ iso8859_14.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
+ ((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247',
+ '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377',
+ '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271',
+ '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_14_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2,
+ 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_14_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa4, 0xa5 },
+ { 0xa5, 0xa4 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xab, 0xa6 },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb1, 0xb0 },
+ { 0xb2, 0xb3 },
+ { 0xb3, 0xb2 },
+ { 0xb4, 0xb5 },
+ { 0xb5, 0xb4 },
+ { 0xb7, 0xb9 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xb7 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xbf },
+ { 0xbc, 0xac },
+ { 0xbd, 0xbe },
+ { 0xbe, 0xbd },
+ { 0xbf, 0xbb },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_14 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-14", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c
new file mode 100644
index 0000000..f643b89
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_15.c
@@ -0,0 +1,279 @@
+/**********************************************************************
+ iso8859_15.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
+ ((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_15_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_15_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_15 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-15", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c
new file mode 100644
index 0000000..921ae36
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_16.c
@@ -0,0 +1,292 @@
+/**********************************************************************
+ iso8859_16.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
+ ((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247',
+ '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277',
+ '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_16_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2,
+ 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_16_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xae, 0xac },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xb9, 0xb2 },
+ { 0xba, 0xaa },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_16 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-16", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c
new file mode 100644
index 0000000..f8cb375
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_2.c
@@ -0,0 +1,292 @@
+/**********************************************************************
+ iso8859_2.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
+ ((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_2_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_2_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+static int
+iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_2 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-2", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_2_mbc_to_normalize,
+ iso_8859_2_is_mbc_ambiguous,
+ iso_8859_2_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_2_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c
new file mode 100644
index 0000000..e62d20d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_3.c
@@ -0,0 +1,281 @@
+/**********************************************************************
+ iso8859_3.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
+ ((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_3_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2,
+ 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_3_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xaf, 0xbf },
+ { 0xb1, 0xa1 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_3 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-3", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_3_mbc_to_normalize,
+ iso_8859_3_is_mbc_ambiguous,
+ iso_8859_3_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_3_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c
new file mode 100644
index 0000000..dd6bd7d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_4.c
@@ -0,0 +1,290 @@
+/**********************************************************************
+ iso8859_4.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
+ ((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
+ '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_4_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+};
+
+static int
+iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_4_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xa2)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_4 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-4", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_4_mbc_to_normalize,
+ iso_8859_4_is_mbc_ambiguous,
+ iso_8859_4_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_4_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c
new file mode 100644
index 0000000..87b7fb8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_5.c
@@ -0,0 +1,296 @@
+/**********************************************************************
+ iso8859_5.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
+ ((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_5_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_5_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_5_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xf1 },
+ { 0xa2, 0xf2 },
+ { 0xa3, 0xf3 },
+ { 0xa4, 0xf4 },
+ { 0xa5, 0xf5 },
+ { 0xa6, 0xf6 },
+ { 0xa7, 0xf7 },
+ { 0xa8, 0xf8 },
+ { 0xa9, 0xf9 },
+ { 0xaa, 0xfa },
+ { 0xab, 0xfb },
+ { 0xac, 0xfc },
+ { 0xae, 0xfe },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xd0 },
+ { 0xb1, 0xd1 },
+ { 0xb2, 0xd2 },
+ { 0xb3, 0xd3 },
+ { 0xb4, 0xd4 },
+ { 0xb5, 0xd5 },
+ { 0xb6, 0xd6 },
+ { 0xb7, 0xd7 },
+ { 0xb8, 0xd8 },
+ { 0xb9, 0xd9 },
+ { 0xba, 0xda },
+ { 0xbb, 0xdb },
+ { 0xbc, 0xdc },
+ { 0xbd, 0xdd },
+ { 0xbe, 0xdf },
+ { 0xbf, 0xdf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xb0 },
+ { 0xd1, 0xb1 },
+ { 0xd2, 0xb2 },
+ { 0xd3, 0xb3 },
+ { 0xd4, 0xb4 },
+ { 0xd5, 0xb5 },
+ { 0xd6, 0xb6 },
+ { 0xd7, 0xb7 },
+ { 0xd8, 0xb8 },
+ { 0xd9, 0xb9 },
+ { 0xda, 0xba },
+ { 0xdb, 0xbb },
+ { 0xdc, 0xbc },
+ { 0xdd, 0xbd },
+ { 0xde, 0xbe },
+ { 0xdf, 0xbf },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xa1 },
+ { 0xf2, 0xa2 },
+ { 0xf3, 0xa3 },
+ { 0xf4, 0xa4 },
+ { 0xf5, 0xa5 },
+ { 0xf6, 0xa6 },
+ { 0xf7, 0xa7 },
+ { 0xf8, 0xa8 },
+ { 0xf9, 0xa9 },
+ { 0xfa, 0xaa },
+ { 0xfb, 0xab },
+ { 0xfc, 0xac },
+ { 0xfe, 0xae },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_5 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-5", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_5_mbc_to_normalize,
+ iso_8859_5_is_mbc_ambiguous,
+ iso_8859_5_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c
new file mode 100644
index 0000000..fffcd0e
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_6.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_6.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
+ ((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_6_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_6 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-6", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_6_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c
new file mode 100644
index 0000000..e87661d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_7.c
@@ -0,0 +1,278 @@
+/**********************************************************************
+ iso8859_7.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
+ ((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267',
+ '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376',
+ '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_7_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2,
+ 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000
+};
+
+static int
+iso_8859_7_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_7_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ if (*p == 0xc0 || *p == 0xe0)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xb6, 0xdc },
+ { 0xb8, 0xdd },
+ { 0xb9, 0xde },
+ { 0xba, 0xdf },
+ { 0xbc, 0xfc },
+ { 0xbe, 0xfd },
+ { 0xbf, 0xfe },
+
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xb6 },
+ { 0xdd, 0xb8 },
+ { 0xde, 0xb9 },
+ { 0xdf, 0xba },
+
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xbc },
+ { 0xfd, 0xbe },
+ { 0xfe, 0xbf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_7 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-7", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_7_mbc_to_normalize,
+ iso_8859_7_is_mbc_ambiguous,
+ iso_8859_7_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_7_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c
new file mode 100644
index 0000000..e76966c
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_8.c
@@ -0,0 +1,105 @@
+/**********************************************************************
+ iso8859_8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
+ ((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
+
+static const unsigned short EncISO_8859_8_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+static int
+iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+OnigEncodingType OnigEncodingISO_8859_8 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-8", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c
new file mode 100644
index 0000000..16a30c5
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/iso8859_9.c
@@ -0,0 +1,270 @@
+/**********************************************************************
+ iso8859_9.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
+#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
+ ((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncISO_8859_9_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static int
+iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_9_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingISO_8859_9 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-9", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ iso_8859_9_mbc_to_normalize,
+ iso_8859_9_is_mbc_ambiguous,
+ iso_8859_9_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_9_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c
new file mode 100644
index 0000000..d7277e8
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/koi8.c
@@ -0,0 +1,264 @@
+/**********************************************************************
+ koi8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
+#define ENC_IS_KOI8_CTYPE(code,ctype) \
+ ((EncKOI8_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncKOI8_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
+};
+
+static const unsigned short EncKOI8_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+};
+
+static int
+koi8_mbc_to_normalize(OnigAmbigType flag,
+ const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
+{
+ const OnigUChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
+{
+ const OnigUChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+
+static int
+koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_KOI8_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingKOI8 = {
+ onigenc_single_byte_mbc_enc_len,
+ "KOI8", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ koi8_mbc_to_normalize,
+ koi8_is_mbc_ambiguous,
+ koi8_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c
new file mode 100644
index 0000000..1010f5f
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/koi8_r.c
@@ -0,0 +1,266 @@
+/**********************************************************************
+ koi8_r.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
+#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
+ ((EncKOI8_R_CtypeTable[code] & ctype) != 0)
+
+static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
+};
+
+static const unsigned short EncKOI8_R_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+};
+
+static int
+koi8_r_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+static int
+koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_R_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
+}
+
+static int
+koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256)
+ return ENC_IS_KOI8_R_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+static int
+koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
+{
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa3, 0xb3 },
+ { 0xb3, 0xa3 },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+OnigEncodingType OnigEncodingKOI8_R = {
+ onigenc_single_byte_mbc_enc_len,
+ "KOI8-R", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ koi8_r_mbc_to_normalize,
+ koi8_r_is_mbc_ambiguous,
+ koi8_r_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_r_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c
new file mode 100644
index 0000000..fcf0574
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/mktable.c
@@ -0,0 +1,1115 @@
+/**********************************************************************
+ mktable.c
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NOT_RUBY
+#include "regenc.h"
+
+#define UNICODE_ISO_8859_1 0
+#define ISO_8859_1 1
+#define ISO_8859_2 2
+#define ISO_8859_3 3
+#define ISO_8859_4 4
+#define ISO_8859_5 5
+#define ISO_8859_6 6
+#define ISO_8859_7 7
+#define ISO_8859_8 8
+#define ISO_8859_9 9
+#define ISO_8859_10 10
+#define ISO_8859_11 11
+#define ISO_8859_13 12
+#define ISO_8859_14 13
+#define ISO_8859_15 14
+#define ISO_8859_16 15
+#define KOI8 16
+#define KOI8_R 17
+
+typedef struct {
+ int num;
+ char* name;
+} ENC_INFO;
+
+static ENC_INFO Info[] = {
+ { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
+ { ISO_8859_1, "ISO_8859_1" },
+ { ISO_8859_2, "ISO_8859_2" },
+ { ISO_8859_3, "ISO_8859_3" },
+ { ISO_8859_4, "ISO_8859_4" },
+ { ISO_8859_5, "ISO_8859_5" },
+ { ISO_8859_6, "ISO_8859_6" },
+ { ISO_8859_7, "ISO_8859_7" },
+ { ISO_8859_8, "ISO_8859_8" },
+ { ISO_8859_9, "ISO_8859_9" },
+ { ISO_8859_10, "ISO_8859_10" },
+ { ISO_8859_11, "ISO_8859_11" },
+ { ISO_8859_13, "ISO_8859_13" },
+ { ISO_8859_14, "ISO_8859_14" },
+ { ISO_8859_15, "ISO_8859_15" },
+ { ISO_8859_16, "ISO_8859_16" },
+ { KOI8, "KOI8" },
+ { KOI8_R, "KOI8_R" }
+};
+
+
+static int IsAlpha(int enc, int c)
+{
+ if (c >= 0x41 && c <= 0x5a) return 1;
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb1) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c >= 0xa1 && c <= 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c == 0xc0) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb5) return 1;
+ if (c >= 0xe0 && c <= 0xfa) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xa6) return 1;
+ if (c >= 0xa8 && c <= 0xac) return 1;
+ if (c == 0xae || c == 0xaf) return 1;
+ if (c >= 0xb1 && c <= 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xbf && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa1 || c == 0xa2) return 1;
+ if (c == 0xa4 || c == 0xa5) return 1;
+ if (c == 0xa6 || c == 0xa8) return 1;
+ if (c >= 0xaa && c <= 0xac) return 1;
+ if (c >= 0xaf && c <= 0xb5) return 1;
+ if (c >= 0xb7 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa2) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb3) return 1;
+ if (c == 0xb4) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xa3 || c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsBlank(int enc, int c)
+{
+ if (c == 0x09 || c == 0x20) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c == 0xa0) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsCntrl(int enc, int c)
+{
+ if (c >= 0x00 && c <= 0x1F) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0xad) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c >= 0x7f && c <= 0x9F) return 1;
+ break;
+
+
+ case KOI8_R:
+ if (c == 0x7f) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsDigit(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ return 0;
+}
+
+static int IsGraph(int enc, int c)
+{
+ if (c >= 0x21 && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ if (c >= 0xa1 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c >= 0xa1) {
+ if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
+ c == 0xe3 || c == 0xf0)
+ return 0;
+ else
+ return 1;
+ }
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
+ return 1;
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c >= 0xa1 && c <= 0xfe &&
+ c != 0xa4 && c != 0xa5 && c != 0xaa &&
+ c != 0xae && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c >= 0xa2 && c <= 0xfa) {
+ if (c >= 0xbf && c <= 0xde) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsLower(int enc, int c)
+{
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xb1) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c == 0xbf) return 1;
+ if (c == 0xdf) return 1;
+ if (c >= 0xe0 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c == 0xa2) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c == 0xdf) return 1;
+ if (c >= 0xe0 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xc0) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb5) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xb1 && c <= 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xbc) return 1;
+ if (c == 0xbe || c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_11:
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xb5) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa2) return 1;
+ if (c == 0xa5) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe || c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xdf && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xbd) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa2) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb3) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xa3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xdf) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsPrint(int enc, int c)
+{
+ if (c >= 0x20 && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c >= 0x09 && c <= 0x0d) return 1;
+ if (c == 0x85) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ if (c >= 0xa0 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c >= 0xa0) {
+ if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
+ c == 0xe3 || c == 0xf0)
+ return 0;
+ else
+ return 1;
+ }
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xa0) return 1;
+ if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
+ return 1;
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c >= 0xa0 && c <= 0xfe &&
+ c != 0xa4 && c != 0xa5 && c != 0xaa &&
+ c != 0xae && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c >= 0xa0 && c <= 0xfa) {
+ if (c >= 0xbf && c <= 0xde) return 0;
+ if (c == 0xa1) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa0 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case KOI8:
+ if (c == 0xa0) return 1;
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c >= 0x80 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsPunct(int enc, int c)
+{
+ if (enc == UNICODE_ISO_8859_1) {
+ if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
+ c == 0x7c || c == 0x7e) return 1;
+ if (c >= 0x3c && c <= 0x3e) return 1;
+ }
+
+ if (c >= 0x21 && c <= 0x2f) return 1;
+ if (c >= 0x3a && c <= 0x40) return 1;
+ if (c >= 0x5b && c <= 0x60) return 1;
+ if (c >= 0x7b && c <= 0x7e) return 1;
+
+ switch (enc) {
+ case ISO_8859_1:
+ case ISO_8859_9:
+ case ISO_8859_15:
+ if (c == 0xad) return 1;
+ /* fall */
+ case UNICODE_ISO_8859_1:
+ if (c == 0xa1) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xbf) return 1;
+ break;
+
+ case ISO_8859_2:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_14:
+ if (c == 0xad) return 1;
+ break;
+
+ case ISO_8859_3:
+ case ISO_8859_10:
+ if (c == 0xad) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c == 0xac) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xbf) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xa1 || c == 0xa2) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb7 || c == 0xbb) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xab) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xdf) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa1 || c == 0xa5) return 1;
+ if (c == 0xab || c == 0xad) return 1;
+ if (c == 0xb4 || c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ if (c == 0xff) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa5) return 1;
+ if (c == 0xab) return 1;
+ if (c == 0xad) return 1;
+ if (c == 0xb5) return 1;
+ if (c == 0xb7) return 1;
+ if (c == 0xbb) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9e) return 1;
+ break;
+
+ case ISO_8859_11:
+ case KOI8:
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsSpace(int enc, int c)
+{
+ if (c >= 0x09 && c <= 0x0d) return 1;
+ if (c == 0x20) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0x85) return 1;
+ /* fall */
+ case ISO_8859_1:
+ case ISO_8859_2:
+ case ISO_8859_3:
+ case ISO_8859_4:
+ case ISO_8859_5:
+ case ISO_8859_6:
+ case ISO_8859_7:
+ case ISO_8859_8:
+ case ISO_8859_9:
+ case ISO_8859_10:
+ case ISO_8859_11:
+ case ISO_8859_13:
+ case ISO_8859_14:
+ case ISO_8859_15:
+ case ISO_8859_16:
+ case KOI8:
+ if (c == 0xa0) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9a) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsUpper(int enc, int c)
+{
+ if (c >= 0x41 && c <= 0x5a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xbd) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ break;
+
+ case ISO_8859_6:
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ break;
+
+ case ISO_8859_8:
+ case ISO_8859_11:
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xa6) return 1;
+ if (c >= 0xa8 && c <= 0xac) return 1;
+ if (c == 0xae || c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c == 0xa1) return 1;
+ if (c == 0xa4 || c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa || c == 0xac) return 1;
+ if (c == 0xaf || c == 0xb0) return 1;
+ if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
+ if (c == 0xbb || c == 0xbd) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case ISO_8859_15:
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xde) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xe0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsXDigit(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ if (c >= 0x41 && c <= 0x46) return 1;
+ if (c >= 0x61 && c <= 0x66) return 1;
+ return 0;
+}
+
+static int IsWord(int enc, int c)
+{
+ if (c >= 0x30 && c <= 0x39) return 1;
+ if (c >= 0x41 && c <= 0x5a) return 1;
+ if (c == 0x5f) return 1;
+ if (c >= 0x61 && c <= 0x7a) return 1;
+
+ switch (enc) {
+ case UNICODE_ISO_8859_1:
+ case ISO_8859_1:
+ case ISO_8859_9:
+ if (c == 0xaa) return 1;
+ if (c >= 0xb2 && c <= 0xb3) return 1;
+ if (c == 0xb5) return 1;
+ if (c >= 0xb9 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ break;
+
+ case ISO_8859_2:
+ if (c == 0xa1 || c == 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c >= 0xae && c <= 0xaf) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbc) return 1;
+ if (c >= 0xbe && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_3:
+ if (c == 0xa1) return 1;
+ if (c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xaf) return 1;
+ if (c >= 0xb1 && c <= 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbd) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xc2) return 1;
+ if (c >= 0xc4 && c <= 0xcf) return 1;
+ if (c >= 0xd1 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xe2) return 1;
+ if (c >= 0xe4 && c <= 0xef) return 1;
+ if (c >= 0xf1 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_4:
+ if (c >= 0xa1 && c <= 0xa3) return 1;
+ if (c == 0xa5 || c == 0xa6) return 1;
+ if (c >= 0xa9 && c <= 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xb1 || c == 0xb3) return 1;
+ if (c == 0xb5 || c == 0xb6) return 1;
+ if (c >= 0xb9 && c <= 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_5:
+ if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
+ if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
+ break;
+
+ case ISO_8859_6:
+ if (c >= 0xc1 && c <= 0xda) return 1;
+ if (c >= 0xe0 && c <= 0xea) return 1;
+ if (c >= 0xeb && c <= 0xf2) return 1;
+ break;
+
+ case ISO_8859_7:
+ if (c == 0xb2 || c == 0xb3) return 1;
+ if (c == 0xb6) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbf) return 1;
+ if (c == 0xc0) return 1;
+ if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
+ if (c >= 0xdc && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_8:
+ if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xe0 && c <= 0xfa) return 1;
+ break;
+
+ case ISO_8859_10:
+ if (c >= 0xa1 && c <= 0xff) {
+ if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
+ return 1;
+ }
+ break;
+
+ case ISO_8859_11:
+ if (c >= 0xa1 && c <= 0xda) return 1;
+ if (c >= 0xdf && c <= 0xfb) return 1;
+ break;
+
+ case ISO_8859_13:
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c == 0xb8) return 1;
+ if (c == 0xba) return 1;
+ if (c >= 0xbf && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xfe) return 1;
+ break;
+
+ case ISO_8859_14:
+ if (c >= 0xa1 && c <= 0xff) {
+ if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
+ c == 0xb6) return 0;
+ return 1;
+ }
+ break;
+
+ case ISO_8859_15:
+ if (c == 0xaa) return 1;
+ if (c >= 0xb2 && c <= 0xb3) return 1;
+ if (c == 0xb5) return 1;
+ if (c >= 0xb9 && c <= 0xba) return 1;
+ if (c >= 0xbc && c <= 0xbe) return 1;
+ if (c >= 0xc0 && c <= 0xd6) return 1;
+ if (c >= 0xd8 && c <= 0xf6) return 1;
+ if (c >= 0xf8 && c <= 0xff) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xb4) return 1;
+ if (c == 0xb8) return 1;
+ break;
+
+ case ISO_8859_16:
+ if (c == 0xa1) return 1;
+ if (c == 0xa2) return 1;
+ if (c == 0xa3) return 1;
+ if (c == 0xa6) return 1;
+ if (c == 0xa8) return 1;
+ if (c == 0xaa) return 1;
+ if (c == 0xac) return 1;
+ if (c == 0xae) return 1;
+ if (c == 0xaf) return 1;
+ if (c == 0xb2) return 1;
+ if (c == 0xb3) return 1;
+ if (c == 0xb4) return 1;
+ if (c >= 0xb8 && c <= 0xba) return 1;
+ if (c == 0xbc) return 1;
+ if (c == 0xbd) return 1;
+ if (c == 0xbe) return 1;
+ if (c == 0xbf) return 1;
+ if (c >= 0xc0 && c <= 0xde) return 1;
+ if (c >= 0xdf && c <= 0xff) return 1;
+ break;
+
+ case KOI8_R:
+ if (c == 0x9d) return 1;
+ if (c == 0xa3 || c == 0xb3) return 1;
+ /* fall */
+ case KOI8:
+ if (c >= 0xc0 && c <= 0xff) return 1;
+ break;
+
+ default:
+ exit(-1);
+ }
+
+ return 0;
+}
+
+static int IsAscii(int enc, int c)
+{
+ if (c >= 0x00 && c <= 0x7f) return 1;
+ return 0;
+}
+
+static int IsNewline(int enc, int c)
+{
+ if (c == 0x0a) return 1;
+ return 0;
+}
+
+static int exec(FILE* fp, ENC_INFO* einfo)
+{
+#define NCOL 8
+
+ int c, val, enc;
+
+ enc = einfo->num;
+
+ fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
+ einfo->name);
+
+ for (c = 0; c < 256; c++) {
+ val = 0;
+ if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
+ if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
+ if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
+ if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
+ if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
+ if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
+ if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
+ if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
+ if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
+ if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
+ if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
+ if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
+ if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
+ if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
+
+ if (c % NCOL == 0) fputs(" ", fp);
+ fprintf(fp, "0x%04x", val);
+ if (c != 255) fputs(",", fp);
+ if (c != 0 && c % NCOL == (NCOL-1))
+ fputs("\n", fp);
+ else
+ fputs(" ", fp);
+ }
+ fprintf(fp, "};\n");
+ return 0;
+}
+
+extern int main(int argc, char* argv[])
+{
+ int i;
+ FILE* fp = stdout;
+
+ for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
+ exec(fp, &Info[i]);
+ }
+}
diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c
new file mode 100644
index 0000000..f7d7d52
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/sjis.c
@@ -0,0 +1,238 @@
+/**********************************************************************
+ sjis.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static const int EncLen_SJIS[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+};
+
+static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
+#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+
+static int
+sjis_mbc_enc_len(const UChar* p)
+{
+ return EncLen_SJIS[*p];
+}
+
+static int
+sjis_code_to_mbclen(OnigCodePoint code)
+{
+ if (code < 256) {
+ if (EncLen_SJIS[(int )code] == 1)
+ return 1;
+ else
+ return 0;
+ }
+ else if (code <= 0xffff) {
+ return 2;
+ }
+ else
+ return 0;
+}
+
+static OnigCodePoint
+sjis_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_SJIS, p);
+ c = *p++;
+ n = c;
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+static int
+sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ *p++ = (UChar )(code & 0xff);
+
+#if 0
+ if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
+ return REGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ return p - buf;
+}
+
+static int
+sjis_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
+ return 1;
+ }
+ else {
+ int len = enc_len(ONIG_ENCODING_SJIS, p);
+
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
+
+}
+
+static int
+sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+ return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+sjis_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ int len;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ if (SJIS_ISMB_TRAIL(*p)) {
+ while (p > start) {
+ if (! SJIS_ISMB_FIRST(*--p)) {
+ p++;
+ break;
+ }
+ }
+ }
+ len = enc_len(ONIG_ENCODING_SJIS, p);
+ if (p + len > s) return (UChar* )p;
+ p += len;
+ return (UChar* )(p + ((s - p) & ~1));
+}
+
+static int
+sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ const UChar c = *s;
+ return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
+}
+
+OnigEncodingType OnigEncodingSJIS = {
+ sjis_mbc_enc_len,
+ "Shift_JIS", /* name */
+ 2, /* max byte length */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ sjis_mbc_to_code,
+ sjis_code_to_mbclen,
+ sjis_code_to_mbc,
+ sjis_mbc_to_normalize,
+ sjis_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ sjis_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ sjis_left_adjust_char_head,
+ sjis_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c
new file mode 100644
index 0000000..a8cf539
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/unicode.c
@@ -0,0 +1,3403 @@
+/**********************************************************************
+ unicode.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static const OnigCodePoint CRAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 414,
+#else
+ 9,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlnum */
+
+static const OnigCodePoint CRAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 396,
+#else
+ 8,
+#endif
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlpha */
+
+static const OnigCodePoint CRBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 3,
+#endif
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRBlank */
+
+static const OnigCodePoint CRCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 19,
+#else
+ 3,
+#endif
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRCntrl */
+
+static const OnigCodePoint CRDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 23,
+#else
+ 1,
+#endif
+ 0x0030, 0x0039
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRDigit */
+
+static const OnigCodePoint CRGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 2,
+#endif
+ 0x0021, 0x007e,
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRGraph */
+
+static const OnigCodePoint CRLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 424,
+#else
+ 6,
+#endif
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRLower */
+
+static const OnigCodePoint CRPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x007e,
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPrint */
+
+static const OnigCodePoint CRPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 86,
+#else
+ 14,
+#endif
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPunct */
+
+static const OnigCodePoint CRSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 11,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRSpace */
+
+static const OnigCodePoint CRUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 421,
+#else
+ 3,
+#endif
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRUpper */
+
+static const OnigCodePoint CRXDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 3,
+#else
+ 3,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static const OnigCodePoint CRASCII[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 1,
+#else
+ 1,
+#endif
+ 0x0000, 0x007f
+};
+
+static const OnigCodePoint CRWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 436,
+#else
+ 12,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRWord */
+
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ return onig_is_in_code_range((UChar* )CRAlpha, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return onig_is_in_code_range((UChar* )CRBlank, code);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return onig_is_in_code_range((UChar* )CRCntrl, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return onig_is_in_code_range((UChar* )CRDigit, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return onig_is_in_code_range((UChar* )CRGraph, code);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return onig_is_in_code_range((UChar* )CRLower, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return onig_is_in_code_range((UChar* )CRPrint, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return onig_is_in_code_range((UChar* )CRPunct, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return onig_is_in_code_range((UChar* )CRSpace, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return onig_is_in_code_range((UChar* )CRUpper, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return onig_is_in_code_range((UChar* )CRWord, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return onig_is_in_code_range((UChar* )CRAlnum, code);
+ break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ return TRUE;
+ }
+ return FALSE;
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}
+
+extern int
+onigenc_unicode_get_ctype_code_range(int ctype,
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+{
+ static const OnigCodePoint EmptyRange[] = { 0 };
+
+#define CR_SET(list) do { \
+ *mbr = list; \
+} while (0)
+
+ *sbr = EmptyRange;
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(CRAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(CRBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(CRCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SET(CRDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(CRGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(CRLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(CRPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(CRPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(CRSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(CRUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SET(CRXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(CRWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SET(CRASCII);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(CRAlnum);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c
new file mode 100644
index 0000000..6ab80a6
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_be.c
@@ -0,0 +1,232 @@
+/**********************************************************************
+ utf16_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static const int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16be_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*p];
+}
+
+static int
+utf16be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*(p+1) == 0x0a && *p == 0x00)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+ return 1;
+ if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+
+ if (UTF16_IS_SURROGATE_FIRST(*p)) {
+ code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ + p[3];
+ }
+ else {
+ code = p[0] * 256 + p[1];
+ }
+ return code;
+}
+
+static int
+utf16be_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ *p++ = (plane >> 2) + 0xd8;
+ high = (code & 0xff00) >> 8;
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (high & 0x02) + 0xdc;
+ *p = (UChar )(code & 0xff);
+ return 4;
+ }
+ else {
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )(code & 0xff);
+ return 2;
+ }
+}
+
+static int
+utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*p == 0) {
+ p++;
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len;
+ len = EncLen_UTF16[*p];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*p];
+
+ if (*p == 0) {
+ int c, v;
+
+ p++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_BE = {
+ utf16be_mbc_enc_len,
+ "UTF-16BE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16be_is_mbc_newline,
+ utf16be_mbc_to_code,
+ utf16be_code_to_mbclen,
+ utf16be_code_to_mbc,
+ utf16be_mbc_to_normalize,
+ utf16be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c
new file mode 100644
index 0000000..2248e49
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_le.c
@@ -0,0 +1,230 @@
+/**********************************************************************
+ utf16_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static const int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16le_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16le_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*(p+1)];
+}
+
+static int
+utf16le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*p == 0x0a && *(p+1) == 0x00)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+ UChar c0 = *p;
+ UChar c1 = *(p+1);
+
+ if (UTF16_IS_SURROGATE_FIRST(c1)) {
+ code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ + p[2];
+ }
+ else {
+ code = c1 * 256 + p[0];
+ }
+ return code;
+}
+
+static int
+utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ high = (code & 0xff00) >> 8;
+
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (plane >> 2) + 0xd8;
+ *p++ = (UChar )(code & 0xff);
+ *p = (high & 0x02) + 0xdc;
+ return 4;
+ }
+ else {
+ *p++ = (UChar )(code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ return 2;
+ }
+}
+
+static int
+utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0) {
+ *(lower+1) = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = EncLen_UTF16[*(p+1)];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*(p+1)];
+
+ if (*(p+1) == 0) {
+ int c, v;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_LE = {
+ utf16le_mbc_enc_len,
+ "UTF-16LE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16le_is_mbc_newline,
+ utf16le_mbc_to_code,
+ utf16le_code_to_mbclen,
+ utf16le_code_to_mbc,
+ utf16le_mbc_to_normalize,
+ utf16le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c
new file mode 100644
index 0000000..75133ca
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_be.c
@@ -0,0 +1,187 @@
+/**********************************************************************
+ utf32_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32be_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+ && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
+ return 1;
+ if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
+ && *(p+1) == 0 && *p == 0)
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
+}
+
+static int
+utf32be_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar ) (code & 0xff);
+ return 4;
+}
+
+static int
+utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ p += 3;
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ int c, v;
+
+ p += 3;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_BE = {
+ utf32be_mbc_enc_len,
+ "UTF-32BE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32be_is_mbc_newline,
+ utf32be_mbc_to_code,
+ utf32be_code_to_mbclen,
+ utf32be_code_to_mbc,
+ utf32be_mbc_to_normalize,
+ utf32be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c
new file mode 100644
index 0000000..21dca10
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_le.c
@@ -0,0 +1,185 @@
+/**********************************************************************
+ utf32_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32le_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
+ return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
+ && (p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
+ && *(p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+#endif
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
+}
+
+static int
+utf32le_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar ) (code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ return 4;
+}
+
+static int
+utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower++ = *p;
+ }
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower = '\0';
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ int c, v;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_LE = {
+ utf32le_mbc_enc_len,
+ "UTF-32LE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32le_is_mbc_newline,
+ utf32le_mbc_to_code,
+ utf32le_code_to_mbclen,
+ utf32le_code_to_mbc,
+ utf32le_mbc_to_normalize,
+ utf32le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c
new file mode 100644
index 0000000..c7481d7
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf8.c
@@ -0,0 +1,3730 @@
+/**********************************************************************
+ utf8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define USE_INVALID_CODE_SCHEME
+
+#ifdef USE_INVALID_CODE_SCHEME
+/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
+#define INVALID_CODE_FE 0xfffffffe
+#define INVALID_CODE_FF 0xffffffff
+#define VALID_CODE_LIMIT 0x7fffffff
+#endif
+
+#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
+
+static const int EncLen_UTF8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+static int
+utf8_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF8[*p];
+}
+
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if (*p == 0x0d) return 1;
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
+static OnigCodePoint
+utf8_mbc_to_code(const UChar* p, const UChar* end)
+{
+ int c, len;
+ OnigCodePoint n;
+
+ len = enc_len(ONIG_ENCODING_UTF8, p);
+ c = *p++;
+ if (len > 1) {
+ len--;
+ n = c & ((1 << (6 - len)) - 1);
+ while (len--) {
+ c = *p++;
+ n = (n << 6) | (c & ((1 << 6) - 1));
+ }
+ return n;
+ }
+ else {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (c > 0xfd) {
+ return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
+ }
+#endif
+ return (OnigCodePoint )c;
+ }
+}
+
+static int
+utf8_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0) return 1;
+ else if ((code & 0xfffff800) == 0) {
+ if (code <= 0xff && code >= 0xfe)
+ return 1;
+ return 2;
+ }
+ else if ((code & 0xffff0000) == 0) return 3;
+ else if ((code & 0xffe00000) == 0) return 4;
+ else if ((code & 0xfc000000) == 0) return 5;
+ else if ((code & 0x80000000) == 0) return 6;
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) return 1;
+ else if (code == INVALID_CODE_FF) return 1;
+#endif
+ else
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+}
+
+#if 0
+static int
+utf8_code_to_mbc_first(OnigCodePoint code)
+{
+ if ((code & 0xffffff80) == 0)
+ return code;
+ else {
+ if ((code & 0xfffff800) == 0)
+ return ((code>>6)& 0x1f) | 0xc0;
+ else if ((code & 0xffff0000) == 0)
+ return ((code>>12) & 0x0f) | 0xe0;
+ else if ((code & 0xffe00000) == 0)
+ return ((code>>18) & 0x07) | 0xf0;
+ else if ((code & 0xfc000000) == 0)
+ return ((code>>24) & 0x03) | 0xf8;
+ else if ((code & 0x80000000) == 0)
+ return ((code>>30) & 0x01) | 0xfc;
+ else {
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+ }
+}
+#endif
+
+static int
+utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
+#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
+
+ if ((code & 0xffffff80) == 0) {
+ *buf = (UChar )code;
+ return 1;
+ }
+ else {
+ UChar *p = buf;
+
+ if ((code & 0xfffff800) == 0) {
+ *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
+ }
+ else if ((code & 0xffff0000) == 0) {
+ *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xffe00000) == 0) {
+ *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xfc000000) == 0) {
+ *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0x80000000) == 0) {
+ *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
+ *p++ = UTF8_TRAILS(code, 24);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) {
+ *p = 0xfe;
+ return 1;
+ }
+ else if (code == INVALID_CODE_FF) {
+ *p = 0xff;
+ return 1;
+ }
+#endif
+ else {
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+
+ *p++ = UTF8_TRAIL0(code);
+ return p - buf;
+ }
+}
+
+static int
+utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+ }
+ else {
+ int len;
+
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if (c <= (UChar )'\236' && /* upper */
+ (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c != (UChar )'\227') {
+ *lower++ = *p;
+ *lower = (UChar )(c + 32);
+ (*pp) += 2;
+ return 2;
+ }
+ }
+ }
+ }
+
+ len = enc_len(ONIG_ENCODING_UTF8, p);
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ }
+ else {
+ (*pp) += enc_len(ONIG_ENCODING_UTF8, p);
+
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c <= (UChar )'\236') { /* upper */
+ if (c == (UChar )'\227') return FALSE;
+ return TRUE;
+ }
+ else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
+ if (c == (UChar )'\267') return FALSE;
+ return TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+
+static const OnigCodePoint EmptyRange[] = { 0 };
+
+static const OnigCodePoint SBAlnum[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 411,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlnum */
+
+static const OnigCodePoint SBAlpha[] = {
+ 2,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 394,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlpha */
+
+static const OnigCodePoint SBBlank[] = {
+ 2,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020
+};
+
+static const OnigCodePoint MBBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 7,
+#else
+ 1,
+#endif
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBBlank */
+
+static const OnigCodePoint SBCntrl[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x007f
+};
+
+static const OnigCodePoint MBCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 18,
+#else
+ 2,
+#endif
+ 0x0080, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBCntrl */
+
+static const OnigCodePoint SBDigit[] = {
+ 1,
+ 0x0030, 0x0039
+};
+
+static const OnigCodePoint MBDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 22,
+#else
+ 0
+#endif
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBDigit */
+
+static const OnigCodePoint SBGraph[] = {
+ 1,
+ 0x0021, 0x007e
+};
+
+static const OnigCodePoint MBGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 404,
+#else
+ 1,
+#endif
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBGraph */
+
+static const OnigCodePoint SBLower[] = {
+ 1,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 423,
+#else
+ 5,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBLower */
+
+static const OnigCodePoint SBPrint[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e
+};
+
+static const OnigCodePoint MBPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 403,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPrint */
+
+static const OnigCodePoint SBPunct[] = {
+ 9,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d
+}; /* end of SBPunct */
+
+static const OnigCodePoint MBPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 77,
+#else
+ 5,
+#endif
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPunct */
+
+static const OnigCodePoint SBSpace[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020
+};
+
+static const OnigCodePoint MBSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBSpace */
+
+static const OnigCodePoint SBUpper[] = {
+ 1,
+ 0x0041, 0x005a
+};
+
+static const OnigCodePoint MBUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 420,
+#else
+ 2,
+#endif
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBUpper */
+
+static const OnigCodePoint SBXDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static const OnigCodePoint SBASCII[] = {
+ 1,
+ 0x0000, 0x007f
+};
+
+static const OnigCodePoint SBWord[] = {
+ 4,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a
+};
+
+static const OnigCodePoint MBWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 432,
+#else
+ 8,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBWord */
+
+
+static int
+utf8_get_ctype_code_range(int ctype,
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+{
+#define CR_SET(sbl,mbl) do { \
+ *sbr = sbl; \
+ *mbr = mbl; \
+} while (0)
+
+#define CR_SB_SET(sbl) do { \
+ *sbr = sbl; \
+ *mbr = EmptyRange; \
+} while (0)
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(SBAlpha, MBAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(SBBlank, MBBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(SBCntrl, MBCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SET(SBDigit, MBDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(SBGraph, MBGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(SBLower, MBLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(SBPrint, MBPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(SBPunct, MBPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(SBSpace, MBSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(SBUpper, MBUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SB_SET(SBXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(SBWord, MBWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SB_SET(SBASCII);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(SBAlnum, MBAlnum);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
+
+static int
+utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ const OnigCodePoint *range;
+#endif
+
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ range = MBAlpha;
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ range = MBBlank;
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ range = MBCntrl;
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ range = MBDigit;
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ range = MBGraph;
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ range = MBLower;
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ range = MBPrint;
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ range = MBPunct;
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ range = MBSpace;
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ range = MBUpper;
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ range = MBWord;
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ range = MBAlnum;
+ break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return onig_is_in_code_range((UChar* )range, code);
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (code <= VALID_CODE_LIMIT)
+#endif
+ return TRUE;
+ }
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+
+ return FALSE;
+}
+
+static UChar*
+utf8_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!utf8_islead(*p) && p > start) p--;
+ return (UChar* )p;
+}
+
+OnigEncodingType OnigEncodingUTF8 = {
+ utf8_mbc_enc_len,
+ "UTF-8", /* name */
+ 6, /* max byte length */
+ 1, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf8_is_mbc_newline,
+ utf8_mbc_to_code,
+ utf8_code_to_mbclen,
+ utf8_code_to_mbc,
+ utf8_mbc_to_normalize,
+ utf8_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ utf8_is_code_ctype,
+ utf8_get_ctype_code_range,
+ utf8_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};