diff options
author | msvensson@neptunus.(none) <> | 2006-10-03 15:56:56 +0200 |
---|---|---|
committer | msvensson@neptunus.(none) <> | 2006-10-03 15:56:56 +0200 |
commit | 237779218c0d2085b28b5f957bd10613d37b7290 (patch) | |
tree | 830353ff5220b0884f043351c0746f315cf272df /strings | |
parent | 48d99634f3462489abd942125e153d923d83d3b5 (diff) | |
parent | 9c53c7ffb13e5af7d9b4ad53b10958fbddb40da6 (diff) | |
download | mariadb-git-237779218c0d2085b28b5f957bd10613d37b7290.tar.gz |
Merge bk-internal:/home/bk/mysql-5.1-new-rpl
into neptunus.(none):/home/msvensson/mysql/mysql-5.1-new-maint
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 1 | ||||
-rw-r--r-- | strings/ctype-bin.c | 1 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 1 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 1 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 1 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 1 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 1 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 1 | ||||
-rw-r--r-- | strings/ctype-simple.c | 337 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 1 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 1 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 30 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 1 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 1 | ||||
-rw-r--r-- | strings/xml.c | 60 |
15 files changed, 430 insertions, 9 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 3f604abde2e..d2dacf2d0a3 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6371,6 +6371,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index e35aee79fd1..5758960ef6c 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -518,6 +518,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index a1c2812c6f6..4b9d09e06b5 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -5493,6 +5493,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 6dc5ccdfe2a..82189d64b6c 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8712,6 +8712,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 0f59cc2b305..4d09bc0e01e 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -8678,6 +8678,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 8d0bc80e695..8c85c0e79d3 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5763,6 +5763,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 7eb332da3bd..20deab9be6c 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -10016,6 +10016,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 95ea87114d6..2c326a2826e 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -412,6 +412,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index a9fd5b8852e..9b45d5a03b7 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -17,6 +17,7 @@ #include <my_global.h> #include "m_string.h" #include "m_ctype.h" +#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */ #include <errno.h> #include "stdarg.h" @@ -1367,6 +1368,341 @@ int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype, } +#undef ULONGLONG_MAX +/* + Needed under MetroWerks Compiler, since MetroWerks compiler does not + properly handle a constant expression containing a mod operator +*/ +#if defined(__NETWARE__) && defined(__MWERKS__) +static ulonglong ulonglong_max= ~(ulonglong) 0; +#define ULONGLONG_MAX ulonglong_max +#else +#define ULONGLONG_MAX (~(ulonglong) 0) +#endif /* __NETWARE__ && __MWERKS__ */ + + +#define CUTOFF (ULONGLONG_MAX / 10) +#define CUTLIM (ULONGLONG_MAX % 10) +#define DIGITS_IN_ULONGLONG 20 + +static ulonglong d10[DIGITS_IN_ULONGLONG]= +{ + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL +}; + + +/* + + Convert a string to unsigned long long integer value + with rounding. + + SYNOPSYS + my_strntoull10_8bit() + cs in pointer to character set + str in pointer to the string to be converted + length in string length + unsigned_flag in whether the number is unsigned + endptr out pointer to the stop character + error out returned error code + + DESCRIPTION + This function takes the decimal representation of integer number + from string str and converts it to an signed or unsigned + long long integer value. + Space characters and tab are ignored. + A sign character might precede the digit characters. + The number may have any number of pre-zero digits. + The number may have decimal point and exponent. + Rounding is always done in "away from zero" style: + 0.5 -> 1 + -0.5 -> -1 + + The function stops reading the string str after "length" bytes + or at the first character that is not a part of correct number syntax: + + <signed numeric literal> ::= + [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ] + + <exact numeric literal> ::= + <unsigned integer> [ <period> [ <unsigned integer> ] ] + | <period> <unsigned integer> + <unsigned integer> ::= <digit>... + + RETURN VALUES + Value of string as a signed/unsigned longlong integer + + endptr cannot be NULL. The function will store the end pointer + to the stop character here. + + The error parameter contains information how things went: + 0 ok + ERANGE If the the value of the converted number is out of range + In this case the return value is: + - ULONGLONG_MAX if unsigned_flag and the number was too big + - 0 if unsigned_flag and the number was negative + - LONGLONG_MAX if no unsigned_flag and the number is too big + - LONGLONG_MIN if no unsigned_flag and the number it too big negative + + EDOM If the string didn't contain any digits. + In this case the return value is 0. +*/ + +ulonglong +my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)), + const char *str, uint length, int unsigned_flag, + char **endptr, int *error) +{ + const char *dot, *end9, *beg, *end= str + length; + ulonglong ull; + ulong ul; + unsigned char ch; + int shift= 0, digits= 0, negative, addon; + + /* Skip leading spaces and tabs */ + for ( ; str < end && (*str == ' ' || *str == '\t') ; str++); + + if (str >= end) + goto ret_edom; + + if ((negative= (*str == '-')) || *str=='+') /* optional sign */ + { + if (++str == end) + goto ret_edom; + } + + beg= str; + end9= (str + 9) > end ? end : (str + 9); + /* Accumulate small number into ulong, for performance purposes */ + for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++) + { + ul= ul * 10 + ch; + } + + if (str >= end) /* Small number without dots and expanents */ + { + *endptr= (char*) str; + if (negative) + { + if (unsigned_flag) + { + *error= ul ? MY_ERRNO_ERANGE : 0; + return 0; + } + else + { + *error= 0; + return (ulonglong) (longlong) (long) -ul; + } + } + else + { + *error=0; + return (ulonglong) ul; + } + } + + digits= str - beg; + + /* Continue to accumulate into ulonglong */ + for (dot= NULL, ull= ul; str < end; str++) + { + if ((ch= (unsigned char) (*str - '0')) < 10) + { + if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM)) + { + ull= ull * 10 + ch; + digits++; + continue; + } + /* + Adding the next digit would overflow. + Remember the next digit in "addon", for rounding. + Scan all digits with an optional single dot. + */ + if (ull == CUTOFF) + { + ull= ULONGLONG_MAX; + addon= 1; + str++; + } + else + addon= (*str >= '5'); + for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++) + { + if (!dot) + shift++; + } + if (str < end && *str == '.' && !dot) + { + str++; + for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++); + } + goto exp; + } + + if (*str == '.') + { + if (dot) + { + /* The second dot character */ + addon= 0; + goto exp; + } + else + { + dot= str + 1; + } + continue; + } + + /* Unknown character, exit the loop */ + break; + } + shift= dot ? dot - str : 0; /* Right shift */ + addon= 0; + +exp: /* [ E [ <sign> ] <unsigned integer> ] */ + + if (!digits) + { + str= beg; + goto ret_edom; + } + + if (str < end && (*str == 'e' || *str == 'E')) + { + str++; + if (str < end) + { + int negative_exp, exp; + if ((negative_exp= (*str == '-')) || *str=='+') + { + if (++str == end) + goto ret_sign; + } + for (exp= 0 ; + str < end && (ch= (unsigned char) (*str - '0')) < 10; + str++) + { + exp= exp * 10 + ch; + } + shift+= negative_exp ? -exp : exp; + } + } + + if (shift == 0) /* No shift, check addon digit */ + { + if (addon) + { + if (ull == ULONGLONG_MAX) + goto ret_too_big; + ull++; + } + goto ret_sign; + } + + if (shift < 0) /* Right shift */ + { + ulonglong d, r; + + if (-shift >= DIGITS_IN_ULONGLONG) + goto ret_zero; /* Exponent is a big negative number, return 0 */ + + d= d10[-shift]; + r= (ull % d) * 2; + ull /= d; + if (r >= d) + ull++; + goto ret_sign; + } + + if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */ + { + if (!ull) + goto ret_sign; + goto ret_too_big; + } + + for ( ; shift > 0; shift--, ull*= 10) /* Left shift */ + { + if (ull > CUTOFF) + goto ret_too_big; /* Overflow, number too big */ + } + +ret_sign: + *endptr= (char*) str; + + if (!unsigned_flag) + { + if (negative) + { + if (ull > (ulonglong) LONGLONG_MIN) + { + *error= MY_ERRNO_ERANGE; + return (ulonglong) LONGLONG_MIN; + } + *error= 0; + return (ulonglong) -ull; + } + else + { + if (ull > (ulonglong) LONGLONG_MAX) + { + *error= MY_ERRNO_ERANGE; + return (ulonglong) LONGLONG_MAX; + } + *error= 0; + return ull; + } + } + + /* Unsigned number */ + if (negative && ull) + { + *error= MY_ERRNO_ERANGE; + return 0; + } + *error= 0; + return ull; + +ret_zero: + *endptr= (char*) str; + *error= 0; + return 0; + +ret_edom: + *endptr= (char*) str; + *error= MY_ERRNO_EDOM; + return 0; + +ret_too_big: + *endptr= (char*) str; + *error= MY_ERRNO_ERANGE; + return unsigned_flag ? + ULONGLONG_MAX : + negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX; +} + + /* Check if a constant can be propagated @@ -1448,6 +1784,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 59156e444c1..0ce085a330e 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4664,6 +4664,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 98cc41dd26f..c5144d28b57 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -892,6 +892,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 2ab2fdc1657..df43eff3d73 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -974,6 +974,35 @@ double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)), } +ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, uint length, int unsign_fl, + char **endptr, int *err) +{ + char buf[256], *b= buf; + ulonglong res; + const uchar *end, *s= (const uchar*) nptr; + my_wc_t wc; + int cnv; + + /* Cut too long strings */ + if (length >= sizeof(buf)) + length= sizeof(buf)-1; + end= s + length; + + while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0) + { + s+= cnv; + if (wc > (int) (uchar) 'e' || !wc) + break; /* Can't be a number part */ + *b++= (char) wc; + } + + res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err); + *endptr= (char*) nptr + 2 * (uint) (*endptr- buf); + return res; +} + + /* This is a fast version optimized for the case of radix 10 / -10 */ @@ -1630,6 +1659,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= my_strntoull_ucs2, my_strntod_ucs2, my_strtoll10_ucs2, + my_strntoull10rnd_ucs2, my_scan_ucs2 }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 06c4540e464..5474377631e 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8546,6 +8546,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 1a952a07042..e221297eb55 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2551,6 +2551,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler= my_strntoull_8bit, my_strntod_8bit, my_strtoll10_8bit, + my_strntoull10rnd_8bit, my_scan_8bit }; diff --git a/strings/xml.c b/strings/xml.c index 51649dcb343..7f7c531d051 100644 --- a/strings/xml.c +++ b/strings/xml.c @@ -19,6 +19,7 @@ #include "my_xml.h" +#define MY_XML_UNKNOWN 'U' #define MY_XML_EOF 'E' #define MY_XML_STRING 'S' #define MY_XML_IDENT 'I' @@ -39,6 +40,46 @@ typedef struct xml_attr_st } MY_XML_ATTR; +/* + XML ctype: +*/ +#define MY_XML_ID0 0x01 /* Identifier initial character */ +#define MY_XML_ID1 0x02 /* Identifier medial character */ +#define MY_XML_SPC 0x08 /* Spacing character */ + + +/* + http://www.w3.org/TR/REC-xml/ + [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + CombiningChar | Extender + [5] Name ::= (Letter | '_' | ':') (NameChar)* +*/ + +static char my_xml_ctype[256]= +{ +/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0, +/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */ +/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */ +/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */ +/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */ +/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */ +/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */ +/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 +}; + +#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC) +#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0) +#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1) + + static const char *lex2str(int lex) { switch(lex) @@ -56,13 +97,13 @@ static const char *lex2str(int lex) case MY_XML_QUESTION: return "'?'"; case MY_XML_EXCLAM: return "'!'"; } - return "UNKNOWN"; + return "unknown token"; } static void my_xml_norm_text(MY_XML_ATTR *a) { - for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ ); - for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- ); + for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ ); + for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- ); } @@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; - for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); + for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { @@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) my_xml_norm_text(a); lex=MY_XML_STRING; } - else + else if (my_xml_is_id0(p->cur[0])) { - for(; - (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); - p->cur++) - {} + p->cur++; + while (p->cur < p->end && my_xml_is_id1(p->cur[0])) + p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } + else + lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); |