summaryrefslogtreecommitdiff
path: root/sjis.c
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-11-04 14:31:26 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-11-04 14:31:26 +0000
commit5e853c811ce1d6d6edc187e580a14133667e1058 (patch)
tree4ecf2cb00a79a481ee5aeda802d5bb73415ca8f5 /sjis.c
parent67ae0fb9aced8cf56de10a1fd400a236bd753b60 (diff)
downloadruby-5e853c811ce1d6d6edc187e580a14133667e1058.tar.gz
This commit was generated by cvs2svn to compensate for changes in r7203,
which included commits to RCS files with non-trunk default branches. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7204 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'sjis.c')
-rw-r--r--sjis.c158
1 files changed, 113 insertions, 45 deletions
diff --git a/sjis.c b/sjis.c
index 8485910e69..f1256c4460 100644
--- a/sjis.c
+++ b/sjis.c
@@ -1,12 +1,53 @@
/**********************************************************************
-
sjis.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_SJIS[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+};
+
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -26,17 +67,39 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
-#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
+#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+static int
+sjis_mbc_enc_len(UChar* p)
+{
+ return EncLen_SJIS[*p];
+}
+
+extern int
+sjis_code_to_mbclen(OnigCodePoint code)
+{
+ if (code < 256) {
+ if (EncLen_SJIS[(int )code] == 1)
+ return 1;
+ else
+ return 0;
+ }
+ else if (code <= 0xffff) {
+ return 2;
+ }
+ else
+ return 0;
+}
+
static OnigCodePoint
sjis_mbc_to_code(UChar* p, UChar* end)
{
int c, i, len;
OnigCodePoint n;
+ len = enc_len(ONIG_ENCODING_SJIS, p);
c = *p++;
- len = enc_len(ONIG_ENCODING_SJIS, c);
n = c;
if (len == 1) return n;
@@ -57,43 +120,57 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 0
- if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
+ if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
return REGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
-sjis_mbc_to_lower(UChar* p, UChar* lower)
+sjis_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower)
{
- int len;
+ UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
return 1;
}
else {
- len = enc_len(ONIG_ENCODING_SJIS, *p);
+ int len = enc_len(ONIG_ENCODING_SJIS, p);
+
if (lower != p) {
- /* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
+ (*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
-sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+sjis_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
+
+}
+
+static int
+sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
- int first = onigenc_mb2_code_to_mbc_first(code);
- return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
+ return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
@@ -123,7 +200,7 @@ sjis_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
- len = enc_len(ONIG_ENCODING_SJIS, *p);
+ len = enc_len(ONIG_ENCODING_SJIS, p);
if (p + len > s) return p;
p += len;
return p + ((s - p) & ~1);
@@ -137,38 +214,29 @@ sjis_is_allowed_reverse_match(UChar* s, UChar* end)
}
OnigEncodingType OnigEncodingSJIS = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
- },
+ sjis_mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
sjis_mbc_to_code,
- onigenc_mb2_code_to_mbclen,
+ sjis_code_to_mbclen,
sjis_code_to_mbc,
- sjis_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- sjis_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ sjis_mbc_to_normalize,
+ sjis_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ sjis_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
sjis_left_adjust_char_head,
- sjis_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ sjis_is_allowed_reverse_match
};