summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authormsvensson@neptunus.(none) <>2006-10-03 15:56:56 +0200
committermsvensson@neptunus.(none) <>2006-10-03 15:56:56 +0200
commit237779218c0d2085b28b5f957bd10613d37b7290 (patch)
tree830353ff5220b0884f043351c0746f315cf272df /strings
parent48d99634f3462489abd942125e153d923d83d3b5 (diff)
parent9c53c7ffb13e5af7d9b4ad53b10958fbddb40da6 (diff)
downloadmariadb-git-237779218c0d2085b28b5f957bd10613d37b7290.tar.gz
Merge bk-internal:/home/bk/mysql-5.1-new-rpl
into neptunus.(none):/home/msvensson/mysql/mysql-5.1-new-maint
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c1
-rw-r--r--strings/ctype-bin.c1
-rw-r--r--strings/ctype-cp932.c1
-rw-r--r--strings/ctype-euc_kr.c1
-rw-r--r--strings/ctype-eucjpms.c1
-rw-r--r--strings/ctype-gb2312.c1
-rw-r--r--strings/ctype-gbk.c1
-rw-r--r--strings/ctype-latin1.c1
-rw-r--r--strings/ctype-simple.c337
-rw-r--r--strings/ctype-sjis.c1
-rw-r--r--strings/ctype-tis620.c1
-rw-r--r--strings/ctype-ucs2.c30
-rw-r--r--strings/ctype-ujis.c1
-rw-r--r--strings/ctype-utf8.c1
-rw-r--r--strings/xml.c60
15 files changed, 430 insertions, 9 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 3f604abde2e..d2dacf2d0a3 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6371,6 +6371,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index e35aee79fd1..5758960ef6c 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -518,6 +518,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index a1c2812c6f6..4b9d09e06b5 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -5493,6 +5493,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 6dc5ccdfe2a..82189d64b6c 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8712,6 +8712,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 0f59cc2b305..4d09bc0e01e 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -8678,6 +8678,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 8d0bc80e695..8c85c0e79d3 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5763,6 +5763,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 7eb332da3bd..20deab9be6c 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10016,6 +10016,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 95ea87114d6..2c326a2826e 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -412,6 +412,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index a9fd5b8852e..9b45d5a03b7 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -17,6 +17,7 @@
#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"
+#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
#include <errno.h>
#include "stdarg.h"
@@ -1367,6 +1368,341 @@ int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
}
+#undef ULONGLONG_MAX
+/*
+ Needed under MetroWerks Compiler, since MetroWerks compiler does not
+ properly handle a constant expression containing a mod operator
+*/
+#if defined(__NETWARE__) && defined(__MWERKS__)
+static ulonglong ulonglong_max= ~(ulonglong) 0;
+#define ULONGLONG_MAX ulonglong_max
+#else
+#define ULONGLONG_MAX (~(ulonglong) 0)
+#endif /* __NETWARE__ && __MWERKS__ */
+
+
+#define CUTOFF (ULONGLONG_MAX / 10)
+#define CUTLIM (ULONGLONG_MAX % 10)
+#define DIGITS_IN_ULONGLONG 20
+
+static ulonglong d10[DIGITS_IN_ULONGLONG]=
+{
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000ULL,
+ 100000000000ULL,
+ 1000000000000ULL,
+ 10000000000000ULL,
+ 100000000000000ULL,
+ 1000000000000000ULL,
+ 10000000000000000ULL,
+ 100000000000000000ULL,
+ 1000000000000000000ULL,
+ 10000000000000000000ULL
+};
+
+
+/*
+
+ Convert a string to unsigned long long integer value
+ with rounding.
+
+ SYNOPSYS
+ my_strntoull10_8bit()
+ cs in pointer to character set
+ str in pointer to the string to be converted
+ length in string length
+ unsigned_flag in whether the number is unsigned
+ endptr out pointer to the stop character
+ error out returned error code
+
+ DESCRIPTION
+ This function takes the decimal representation of integer number
+ from string str and converts it to an signed or unsigned
+ long long integer value.
+ Space characters and tab are ignored.
+ A sign character might precede the digit characters.
+ The number may have any number of pre-zero digits.
+ The number may have decimal point and exponent.
+ Rounding is always done in "away from zero" style:
+ 0.5 -> 1
+ -0.5 -> -1
+
+ The function stops reading the string str after "length" bytes
+ or at the first character that is not a part of correct number syntax:
+
+ <signed numeric literal> ::=
+ [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
+
+ <exact numeric literal> ::=
+ <unsigned integer> [ <period> [ <unsigned integer> ] ]
+ | <period> <unsigned integer>
+ <unsigned integer> ::= <digit>...
+
+ RETURN VALUES
+ Value of string as a signed/unsigned longlong integer
+
+ endptr cannot be NULL. The function will store the end pointer
+ to the stop character here.
+
+ The error parameter contains information how things went:
+ 0 ok
+ ERANGE If the the value of the converted number is out of range
+ In this case the return value is:
+ - ULONGLONG_MAX if unsigned_flag and the number was too big
+ - 0 if unsigned_flag and the number was negative
+ - LONGLONG_MAX if no unsigned_flag and the number is too big
+ - LONGLONG_MIN if no unsigned_flag and the number it too big negative
+
+ EDOM If the string didn't contain any digits.
+ In this case the return value is 0.
+*/
+
+ulonglong
+my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)),
+ const char *str, uint length, int unsigned_flag,
+ char **endptr, int *error)
+{
+ const char *dot, *end9, *beg, *end= str + length;
+ ulonglong ull;
+ ulong ul;
+ unsigned char ch;
+ int shift= 0, digits= 0, negative, addon;
+
+ /* Skip leading spaces and tabs */
+ for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
+
+ if (str >= end)
+ goto ret_edom;
+
+ if ((negative= (*str == '-')) || *str=='+') /* optional sign */
+ {
+ if (++str == end)
+ goto ret_edom;
+ }
+
+ beg= str;
+ end9= (str + 9) > end ? end : (str + 9);
+ /* Accumulate small number into ulong, for performance purposes */
+ for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++)
+ {
+ ul= ul * 10 + ch;
+ }
+
+ if (str >= end) /* Small number without dots and expanents */
+ {
+ *endptr= (char*) str;
+ if (negative)
+ {
+ if (unsigned_flag)
+ {
+ *error= ul ? MY_ERRNO_ERANGE : 0;
+ return 0;
+ }
+ else
+ {
+ *error= 0;
+ return (ulonglong) (longlong) (long) -ul;
+ }
+ }
+ else
+ {
+ *error=0;
+ return (ulonglong) ul;
+ }
+ }
+
+ digits= str - beg;
+
+ /* Continue to accumulate into ulonglong */
+ for (dot= NULL, ull= ul; str < end; str++)
+ {
+ if ((ch= (unsigned char) (*str - '0')) < 10)
+ {
+ if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
+ {
+ ull= ull * 10 + ch;
+ digits++;
+ continue;
+ }
+ /*
+ Adding the next digit would overflow.
+ Remember the next digit in "addon", for rounding.
+ Scan all digits with an optional single dot.
+ */
+ if (ull == CUTOFF)
+ {
+ ull= ULONGLONG_MAX;
+ addon= 1;
+ str++;
+ }
+ else
+ addon= (*str >= '5');
+ for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++)
+ {
+ if (!dot)
+ shift++;
+ }
+ if (str < end && *str == '.' && !dot)
+ {
+ str++;
+ for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++);
+ }
+ goto exp;
+ }
+
+ if (*str == '.')
+ {
+ if (dot)
+ {
+ /* The second dot character */
+ addon= 0;
+ goto exp;
+ }
+ else
+ {
+ dot= str + 1;
+ }
+ continue;
+ }
+
+ /* Unknown character, exit the loop */
+ break;
+ }
+ shift= dot ? dot - str : 0; /* Right shift */
+ addon= 0;
+
+exp: /* [ E [ <sign> ] <unsigned integer> ] */
+
+ if (!digits)
+ {
+ str= beg;
+ goto ret_edom;
+ }
+
+ if (str < end && (*str == 'e' || *str == 'E'))
+ {
+ str++;
+ if (str < end)
+ {
+ int negative_exp, exp;
+ if ((negative_exp= (*str == '-')) || *str=='+')
+ {
+ if (++str == end)
+ goto ret_sign;
+ }
+ for (exp= 0 ;
+ str < end && (ch= (unsigned char) (*str - '0')) < 10;
+ str++)
+ {
+ exp= exp * 10 + ch;
+ }
+ shift+= negative_exp ? -exp : exp;
+ }
+ }
+
+ if (shift == 0) /* No shift, check addon digit */
+ {
+ if (addon)
+ {
+ if (ull == ULONGLONG_MAX)
+ goto ret_too_big;
+ ull++;
+ }
+ goto ret_sign;
+ }
+
+ if (shift < 0) /* Right shift */
+ {
+ ulonglong d, r;
+
+ if (-shift >= DIGITS_IN_ULONGLONG)
+ goto ret_zero; /* Exponent is a big negative number, return 0 */
+
+ d= d10[-shift];
+ r= (ull % d) * 2;
+ ull /= d;
+ if (r >= d)
+ ull++;
+ goto ret_sign;
+ }
+
+ if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
+ {
+ if (!ull)
+ goto ret_sign;
+ goto ret_too_big;
+ }
+
+ for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
+ {
+ if (ull > CUTOFF)
+ goto ret_too_big; /* Overflow, number too big */
+ }
+
+ret_sign:
+ *endptr= (char*) str;
+
+ if (!unsigned_flag)
+ {
+ if (negative)
+ {
+ if (ull > (ulonglong) LONGLONG_MIN)
+ {
+ *error= MY_ERRNO_ERANGE;
+ return (ulonglong) LONGLONG_MIN;
+ }
+ *error= 0;
+ return (ulonglong) -ull;
+ }
+ else
+ {
+ if (ull > (ulonglong) LONGLONG_MAX)
+ {
+ *error= MY_ERRNO_ERANGE;
+ return (ulonglong) LONGLONG_MAX;
+ }
+ *error= 0;
+ return ull;
+ }
+ }
+
+ /* Unsigned number */
+ if (negative && ull)
+ {
+ *error= MY_ERRNO_ERANGE;
+ return 0;
+ }
+ *error= 0;
+ return ull;
+
+ret_zero:
+ *endptr= (char*) str;
+ *error= 0;
+ return 0;
+
+ret_edom:
+ *endptr= (char*) str;
+ *error= MY_ERRNO_EDOM;
+ return 0;
+
+ret_too_big:
+ *endptr= (char*) str;
+ *error= MY_ERRNO_ERANGE;
+ return unsigned_flag ?
+ ULONGLONG_MAX :
+ negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
+}
+
+
/*
Check if a constant can be propagated
@@ -1448,6 +1784,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 59156e444c1..0ce085a330e 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4664,6 +4664,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 98cc41dd26f..c5144d28b57 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -892,6 +892,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 2ab2fdc1657..df43eff3d73 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -974,6 +974,35 @@ double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
+ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *nptr, uint length, int unsign_fl,
+ char **endptr, int *err)
+{
+ char buf[256], *b= buf;
+ ulonglong res;
+ const uchar *end, *s= (const uchar*) nptr;
+ my_wc_t wc;
+ int cnv;
+
+ /* Cut too long strings */
+ if (length >= sizeof(buf))
+ length= sizeof(buf)-1;
+ end= s + length;
+
+ while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
+ {
+ s+= cnv;
+ if (wc > (int) (uchar) 'e' || !wc)
+ break; /* Can't be a number part */
+ *b++= (char) wc;
+ }
+
+ res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
+ *endptr= (char*) nptr + 2 * (uint) (*endptr- buf);
+ return res;
+}
+
+
/*
This is a fast version optimized for the case of radix 10 / -10
*/
@@ -1630,6 +1659,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_strntoull_ucs2,
my_strntod_ucs2,
my_strtoll10_ucs2,
+ my_strntoull10rnd_ucs2,
my_scan_ucs2
};
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 06c4540e464..5474377631e 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8546,6 +8546,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 1a952a07042..e221297eb55 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2551,6 +2551,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_strntoull_8bit,
my_strntod_8bit,
my_strtoll10_8bit,
+ my_strntoull10rnd_8bit,
my_scan_8bit
};
diff --git a/strings/xml.c b/strings/xml.c
index 51649dcb343..7f7c531d051 100644
--- a/strings/xml.c
+++ b/strings/xml.c
@@ -19,6 +19,7 @@
#include "my_xml.h"
+#define MY_XML_UNKNOWN 'U'
#define MY_XML_EOF 'E'
#define MY_XML_STRING 'S'
#define MY_XML_IDENT 'I'
@@ -39,6 +40,46 @@ typedef struct xml_attr_st
} MY_XML_ATTR;
+/*
+ XML ctype:
+*/
+#define MY_XML_ID0 0x01 /* Identifier initial character */
+#define MY_XML_ID1 0x02 /* Identifier medial character */
+#define MY_XML_SPC 0x08 /* Spacing character */
+
+
+/*
+ http://www.w3.org/TR/REC-xml/
+ [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
+ CombiningChar | Extender
+ [5] Name ::= (Letter | '_' | ':') (NameChar)*
+*/
+
+static char my_xml_ctype[256]=
+{
+/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
+/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
+/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
+/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
+/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
+/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
+/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
+/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
+};
+
+#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
+#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
+#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
+
+
static const char *lex2str(int lex)
{
switch(lex)
@@ -56,13 +97,13 @@ static const char *lex2str(int lex)
case MY_XML_QUESTION: return "'?'";
case MY_XML_EXCLAM: return "'!'";
}
- return "UNKNOWN";
+ return "unknown token";
}
static void my_xml_norm_text(MY_XML_ATTR *a)
{
- for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ );
- for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- );
+ for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
+ for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
}
@@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
{
int lex;
- for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++);
+ for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
if (p->cur >= p->end)
{
@@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
my_xml_norm_text(a);
lex=MY_XML_STRING;
}
- else
+ else if (my_xml_is_id0(p->cur[0]))
{
- for(;
- (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]);
- p->cur++)
- {}
+ p->cur++;
+ while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
+ p->cur++;
a->end=p->cur;
my_xml_norm_text(a);
lex=MY_XML_IDENT;
}
+ else
+ lex= MY_XML_UNKNOWN;
#if 0
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);