summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma/enc/big5.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma/enc/big5.c')
-rw-r--r--ext/mbstring/oniguruma/enc/big5.c138
1 files changed, 92 insertions, 46 deletions
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
index 8aad7f5354..763872e963 100644
--- a/ext/mbstring/oniguruma/enc/big5.c
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -1,14 +1,61 @@
/**********************************************************************
-
big5.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_BIG5[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+big5_mbc_enc_len(const UChar* p)
+{
+ return EncLen_BIG5[*p];
+}
+
static OnigCodePoint
-big5_mbc_to_code(UChar* p, UChar* end)
+big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
@@ -20,15 +67,23 @@ big5_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-big5_mbc_to_lower(UChar* p, UChar* lower)
+big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
+ pp, end, lower);
+}
+
+static int
+big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_BIG5, p, lower);
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
}
static int
-big5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- return onigenc_mb2_code_is_ctype(ONIG_ENCODING_BIG5, code, ctype);
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
}
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
@@ -50,16 +105,16 @@ static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
-#define BIG5_ISMB_FIRST(byte) (OnigEncodingBIG5.len_table[byte] > 1)
+#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
static UChar*
-big5_left_adjust_char_head(UChar* start, UChar* s)
+big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
if (BIG5_ISMB_TRAIL(*p)) {
@@ -70,53 +125,44 @@ big5_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
- len = enc_len(ONIG_ENCODING_BIG5, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_BIG5, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-big5_is_allowed_reverse_match(UChar* s, UChar* end)
+big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingBIG5 = {
+ big5_mbc_enc_len,
+ "Big5", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "Big5", /* name */
- 2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
- big5_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- big5_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ big5_mbc_to_normalize,
+ big5_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ big5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
- big5_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ big5_is_allowed_reverse_match
};