summaryrefslogtreecommitdiff
path: root/gnulib/lib/unistr
diff options
context:
space:
mode:
authorRichard Maw <richard.maw@codethink.co.uk>2012-01-17 14:43:55 +0000
committerRichard Maw <richard.maw@codethink.co.uk>2012-01-17 14:43:55 +0000
commit2de9abc5c9d40b3c716307d67d16146f823fd554 (patch)
tree6979db67934ddc8b564150b465846a383b428ff8 /gnulib/lib/unistr
parent33cc1c6fda6e72a7bae1401e9b2cec495a4d3ff1 (diff)
downloadpatch-baserock/bootstrap-pass2.tar.gz
Diffstat (limited to 'gnulib/lib/unistr')
m---------gnulib0
-rw-r--r--gnulib/lib/unistr/u-cmp2.h32
-rw-r--r--gnulib/lib/unistr/u-cpy-alloc.h40
-rw-r--r--gnulib/lib/unistr/u-cpy.h32
-rw-r--r--gnulib/lib/unistr/u-endswith.h28
-rw-r--r--gnulib/lib/unistr/u-move.h44
-rw-r--r--gnulib/lib/unistr/u-set.h39
-rw-r--r--gnulib/lib/unistr/u-startswith.h30
-rw-r--r--gnulib/lib/unistr/u-stpcpy.h24
-rw-r--r--gnulib/lib/unistr/u-stpncpy.h34
-rw-r--r--gnulib/lib/unistr/u-strcat.h26
-rw-r--r--gnulib/lib/unistr/u-strcoll.h92
-rw-r--r--gnulib/lib/unistr/u-strcpy.h26
-rw-r--r--gnulib/lib/unistr/u-strcspn.h54
-rw-r--r--gnulib/lib/unistr/u-strdup.h41
-rw-r--r--gnulib/lib/unistr/u-strlen.h26
-rw-r--r--gnulib/lib/unistr/u-strncat.h28
-rw-r--r--gnulib/lib/unistr/u-strncpy.h32
-rw-r--r--gnulib/lib/unistr/u-strnlen.h26
-rw-r--r--gnulib/lib/unistr/u-strpbrk.h46
-rw-r--r--gnulib/lib/unistr/u-strspn.h54
-rw-r--r--gnulib/lib/unistr/u-strstr.h131
-rw-r--r--gnulib/lib/unistr/u-strtok.h52
-rw-r--r--gnulib/lib/unistr/u16-check.c51
-rw-r--r--gnulib/lib/unistr/u16-chr.c57
-rw-r--r--gnulib/lib/unistr/u16-cmp.c54
-rw-r--r--gnulib/lib/unistr/u16-cmp2.c28
-rw-r--r--gnulib/lib/unistr/u16-cpy-alloc.c25
-rw-r--r--gnulib/lib/unistr/u16-cpy.c25
-rw-r--r--gnulib/lib/unistr/u16-endswith.c27
-rw-r--r--gnulib/lib/unistr/u16-mblen.c50
-rw-r--r--gnulib/lib/unistr/u16-mbsnlen.c42
-rw-r--r--gnulib/lib/unistr/u16-mbtouc-aux.c51
-rw-r--r--gnulib/lib/unistr/u16-mbtouc-unsafe-aux.c55
-rw-r--r--gnulib/lib/unistr/u16-mbtouc-unsafe.c66
-rw-r--r--gnulib/lib/unistr/u16-mbtouc.c61
-rw-r--r--gnulib/lib/unistr/u16-mbtoucr.c54
-rw-r--r--gnulib/lib/unistr/u16-move.c25
-rw-r--r--gnulib/lib/unistr/u16-next.c37
-rw-r--r--gnulib/lib/unistr/u16-prev.c53
-rw-r--r--gnulib/lib/unistr/u16-set.c26
-rw-r--r--gnulib/lib/unistr/u16-startswith.c25
-rw-r--r--gnulib/lib/unistr/u16-stpcpy.c26
-rw-r--r--gnulib/lib/unistr/u16-stpncpy.c25
-rw-r--r--gnulib/lib/unistr/u16-strcat.c26
-rw-r--r--gnulib/lib/unistr/u16-strchr.c64
-rw-r--r--gnulib/lib/unistr/u16-strcmp.c50
-rw-r--r--gnulib/lib/unistr/u16-strcoll.c33
-rw-r--r--gnulib/lib/unistr/u16-strcpy.c25
-rw-r--r--gnulib/lib/unistr/u16-strcspn.c28
-rw-r--r--gnulib/lib/unistr/u16-strdup.c26
-rw-r--r--gnulib/lib/unistr/u16-strlen.c25
-rw-r--r--gnulib/lib/unistr/u16-strmblen.c44
-rw-r--r--gnulib/lib/unistr/u16-strmbtouc.c51
-rw-r--r--gnulib/lib/unistr/u16-strncat.c26
-rw-r--r--gnulib/lib/unistr/u16-strncmp.c54
-rw-r--r--gnulib/lib/unistr/u16-strncpy.c25
-rw-r--r--gnulib/lib/unistr/u16-strnlen.c25
-rw-r--r--gnulib/lib/unistr/u16-strpbrk.c27
-rw-r--r--gnulib/lib/unistr/u16-strrchr.c65
-rw-r--r--gnulib/lib/unistr/u16-strspn.c29
-rw-r--r--gnulib/lib/unistr/u16-strstr.c37
-rw-r--r--gnulib/lib/unistr/u16-strtok.c27
-rw-r--r--gnulib/lib/unistr/u16-to-u32.c125
-rw-r--r--gnulib/lib/unistr/u16-to-u8.c136
-rw-r--r--gnulib/lib/unistr/u16-uctomb-aux.c58
-rw-r--r--gnulib/lib/unistr/u16-uctomb.c72
-rw-r--r--gnulib/lib/unistr/u32-check.c39
-rw-r--r--gnulib/lib/unistr/u32-chr.c32
-rw-r--r--gnulib/lib/unistr/u32-cmp.c40
-rw-r--r--gnulib/lib/unistr/u32-cmp2.c28
-rw-r--r--gnulib/lib/unistr/u32-cpy-alloc.c25
-rw-r--r--gnulib/lib/unistr/u32-cpy.c25
-rw-r--r--gnulib/lib/unistr/u32-endswith.c27
-rw-r--r--gnulib/lib/unistr/u32-mblen.c37
-rw-r--r--gnulib/lib/unistr/u32-mbsnlen.c27
-rw-r--r--gnulib/lib/unistr/u32-mbtouc-unsafe.c48
-rw-r--r--gnulib/lib/unistr/u32-mbtouc.c43
-rw-r--r--gnulib/lib/unistr/u32-mbtoucr.c39
-rw-r--r--gnulib/lib/unistr/u32-move.c25
-rw-r--r--gnulib/lib/unistr/u32-next.c39
-rw-r--r--gnulib/lib/unistr/u32-prev.c39
-rw-r--r--gnulib/lib/unistr/u32-set.c26
-rw-r--r--gnulib/lib/unistr/u32-startswith.c25
-rw-r--r--gnulib/lib/unistr/u32-stpcpy.c25
-rw-r--r--gnulib/lib/unistr/u32-stpncpy.c25
-rw-r--r--gnulib/lib/unistr/u32-strcat.c26
-rw-r--r--gnulib/lib/unistr/u32-strchr.c36
-rw-r--r--gnulib/lib/unistr/u32-strcmp.c36
-rw-r--r--gnulib/lib/unistr/u32-strcoll.c33
-rw-r--r--gnulib/lib/unistr/u32-strcpy.c25
-rw-r--r--gnulib/lib/unistr/u32-strcspn.c51
-rw-r--r--gnulib/lib/unistr/u32-strdup.c26
-rw-r--r--gnulib/lib/unistr/u32-strlen.c25
-rw-r--r--gnulib/lib/unistr/u32-strmblen.c36
-rw-r--r--gnulib/lib/unistr/u32-strmbtouc.c39
-rw-r--r--gnulib/lib/unistr/u32-strncat.c26
-rw-r--r--gnulib/lib/unistr/u32-strncmp.c40
-rw-r--r--gnulib/lib/unistr/u32-strncpy.c25
-rw-r--r--gnulib/lib/unistr/u32-strnlen.c25
-rw-r--r--gnulib/lib/unistr/u32-strpbrk.c50
-rw-r--r--gnulib/lib/unistr/u32-strrchr.c38
-rw-r--r--gnulib/lib/unistr/u32-strspn.c50
-rw-r--r--gnulib/lib/unistr/u32-strstr.c34
-rw-r--r--gnulib/lib/unistr/u32-strtok.c27
-rw-r--r--gnulib/lib/unistr/u32-to-u16.c130
-rw-r--r--gnulib/lib/unistr/u32-to-u8.c130
-rw-r--r--gnulib/lib/unistr/u32-uctomb.c47
-rw-r--r--gnulib/lib/unistr/u8-check.c105
-rw-r--r--gnulib/lib/unistr/u8-chr.c201
-rw-r--r--gnulib/lib/unistr/u8-cmp.c30
-rw-r--r--gnulib/lib/unistr/u8-cmp2.c28
-rw-r--r--gnulib/lib/unistr/u8-cpy-alloc.c25
-rw-r--r--gnulib/lib/unistr/u8-cpy.c25
-rw-r--r--gnulib/lib/unistr/u8-endswith.c27
-rw-r--r--gnulib/lib/unistr/u8-mblen.c99
-rw-r--r--gnulib/lib/unistr/u8-mbsnlen.c44
-rw-r--r--gnulib/lib/unistr/u8-mbtouc-aux.c240
-rw-r--r--gnulib/lib/unistr/u8-mbtouc-unsafe-aux.c260
-rw-r--r--gnulib/lib/unistr/u8-mbtouc-unsafe.c271
-rw-r--r--gnulib/lib/unistr/u8-mbtouc.c250
-rw-r--r--gnulib/lib/unistr/u8-mbtoucr.c285
-rw-r--r--gnulib/lib/unistr/u8-move.c25
-rw-r--r--gnulib/lib/unistr/u8-next.c37
-rw-r--r--gnulib/lib/unistr/u8-prev.c93
-rw-r--r--gnulib/lib/unistr/u8-set.c44
-rw-r--r--gnulib/lib/unistr/u8-startswith.c25
-rw-r--r--gnulib/lib/unistr/u8-stpcpy.c44
-rw-r--r--gnulib/lib/unistr/u8-stpncpy.c44
-rw-r--r--gnulib/lib/unistr/u8-strcat.c29
-rw-r--r--gnulib/lib/unistr/u8-strchr.c240
-rw-r--r--gnulib/lib/unistr/u8-strcmp.c30
-rw-r--r--gnulib/lib/unistr/u8-strcoll.c33
-rw-r--r--gnulib/lib/unistr/u8-strcpy.c29
-rw-r--r--gnulib/lib/unistr/u8-strcspn.c28
-rw-r--r--gnulib/lib/unistr/u8-strdup.c40
-rw-r--r--gnulib/lib/unistr/u8-strlen.c29
-rw-r--r--gnulib/lib/unistr/u8-strmblen.c97
-rw-r--r--gnulib/lib/unistr/u8-strmbtouc.c130
-rw-r--r--gnulib/lib/unistr/u8-strncat.c29
-rw-r--r--gnulib/lib/unistr/u8-strncmp.c30
-rw-r--r--gnulib/lib/unistr/u8-strncpy.c29
-rw-r--r--gnulib/lib/unistr/u8-strnlen.c44
-rw-r--r--gnulib/lib/unistr/u8-strpbrk.c27
-rw-r--r--gnulib/lib/unistr/u8-strrchr.c102
-rw-r--r--gnulib/lib/unistr/u8-strspn.c29
-rw-r--r--gnulib/lib/unistr/u8-strstr.c32
-rw-r--r--gnulib/lib/unistr/u8-strtok.c27
-rw-r--r--gnulib/lib/unistr/u8-to-u16.c136
-rw-r--r--gnulib/lib/unistr/u8-to-u32.c125
-rw-r--r--gnulib/lib/unistr/u8-uctomb-aux.c69
-rw-r--r--gnulib/lib/unistr/u8-uctomb.c88
152 files changed, 8101 insertions, 0 deletions
diff --git a/gnulib b/gnulib
deleted file mode 160000
-Subproject 443bc5ffcf7429e557f4a371b0661abe98ddbc1
diff --git a/gnulib/lib/unistr/u-cmp2.h b/gnulib/lib/unistr/u-cmp2.h
new file mode 100644
index 0000000..27ace69
--- /dev/null
+++ b/gnulib/lib/unistr/u-cmp2.h
@@ -0,0 +1,32 @@
+/* Compare pieces of UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+int
+FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2)
+{
+ int cmp = U_CMP (s1, s2, MIN (n1, n2));
+
+ if (cmp == 0)
+ {
+ if (n1 < n2)
+ cmp = -1;
+ else if (n1 > n2)
+ cmp = 1;
+ }
+
+ return cmp;
+}
diff --git a/gnulib/lib/unistr/u-cpy-alloc.h b/gnulib/lib/unistr/u-cpy-alloc.h
new file mode 100644
index 0000000..07b74ff
--- /dev/null
+++ b/gnulib/lib/unistr/u-cpy-alloc.h
@@ -0,0 +1,40 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+
+UNIT *
+FUNC (const UNIT *s, size_t n)
+{
+ UNIT *dest;
+
+ dest = (UNIT *) malloc (n > 0 ? n * sizeof (UNIT) : 1);
+ if (dest != NULL)
+ {
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *s++;
+#else
+ memcpy ((char *) dest, (const char *) s, n * sizeof (UNIT));
+#endif
+ }
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-cpy.h b/gnulib/lib/unistr/u-cpy.h
new file mode 100644
index 0000000..0c2bb10
--- /dev/null
+++ b/gnulib/lib/unistr/u-cpy.h
@@ -0,0 +1,32 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *src++;
+#else
+ memcpy ((char *) dest, (const char *) src, n * sizeof (UNIT));
+#endif
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-endswith.h b/gnulib/lib/unistr/u-endswith.h
new file mode 100644
index 0000000..5a6d53f
--- /dev/null
+++ b/gnulib/lib/unistr/u-endswith.h
@@ -0,0 +1,28 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+bool
+FUNC (const UNIT *str, const UNIT *suffix)
+{
+ size_t len = U_STRLEN (str);
+ size_t suffixlen = U_STRLEN (suffix);
+
+ if (len >= suffixlen)
+ return (U_CMP (str + (len - suffixlen), suffix, suffixlen) == 0);
+ else
+ return false;
+}
diff --git a/gnulib/lib/unistr/u-move.h b/gnulib/lib/unistr/u-move.h
new file mode 100644
index 0000000..ff0eee6
--- /dev/null
+++ b/gnulib/lib/unistr/u-move.h
@@ -0,0 +1,44 @@
+/* Copy piece of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+#if 0
+ if (dest < src)
+ {
+ UNIT *destptr = dest;
+ const UNIT *srcptr = src;
+
+ for (; n > 0; n--)
+ *destptr++ = *srcptr++;
+ }
+ else if (dest > src)
+ {
+ UNIT *destptr = dest + n - 1;
+ const UNIT *srcptr = src + n - 1;
+
+ for (; n > 0; n--)
+ *destptr-- = *srcptr--;
+ }
+#else
+ memmove ((char *) dest, (const char *) src, n * sizeof (UNIT));
+#endif
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-set.h b/gnulib/lib/unistr/u-set.h
new file mode 100644
index 0000000..42cf01d
--- /dev/null
+++ b/gnulib/lib/unistr/u-set.h
@@ -0,0 +1,39 @@
+/* Fill UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+
+UNIT *
+FUNC (UNIT *s, ucs4_t uc, size_t n)
+{
+ if (n > 0)
+ {
+ if (IS_SINGLE_UNIT (uc))
+ {
+ UNIT *ptr = s;
+
+ for (; n > 0; n--)
+ *ptr++ = uc;
+ }
+ else
+ {
+ errno = EILSEQ;
+ return NULL;
+ }
+ }
+ return s;
+}
diff --git a/gnulib/lib/unistr/u-startswith.h b/gnulib/lib/unistr/u-startswith.h
new file mode 100644
index 0000000..6f77a04
--- /dev/null
+++ b/gnulib/lib/unistr/u-startswith.h
@@ -0,0 +1,30 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+bool
+FUNC (const UNIT *str, const UNIT *prefix)
+{
+ for (;;)
+ {
+ UNIT uc1 = *str++;
+ UNIT uc2 = *prefix++;
+ if (uc2 == 0)
+ return true;
+ if (uc1 != uc2)
+ return false;
+ }
+}
diff --git a/gnulib/lib/unistr/u-stpcpy.h b/gnulib/lib/unistr/u-stpcpy.h
new file mode 100644
index 0000000..1755f4c
--- /dev/null
+++ b/gnulib/lib/unistr/u-stpcpy.h
@@ -0,0 +1,24 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ for (; (*dest = *src) != 0; src++, dest++)
+ ;
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-stpncpy.h b/gnulib/lib/unistr/u-stpncpy.h
new file mode 100644
index 0000000..408a5dc
--- /dev/null
+++ b/gnulib/lib/unistr/u-stpncpy.h
@@ -0,0 +1,34 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ for (; n > 0 && (*dest = *src) != 0; src++, dest++, n--)
+ ;
+
+ /* This behavior is rarely useful, but it is here for consistency with
+ strncpy and wcsncpy. */
+ {
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = 0;
+ }
+
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strcat.h b/gnulib/lib/unistr/u-strcat.h
new file mode 100644
index 0000000..a993110
--- /dev/null
+++ b/gnulib/lib/unistr/u-strcat.h
@@ -0,0 +1,26 @@
+/* Concatenate UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ UNIT *destptr = dest + U_STRLEN (dest);
+
+ for (; (*destptr = *src) != 0; src++, destptr++)
+ ;
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strcoll.h b/gnulib/lib/unistr/u-strcoll.h
new file mode 100644
index 0000000..2a1b553
--- /dev/null
+++ b/gnulib/lib/unistr/u-strcoll.h
@@ -0,0 +1,92 @@
+/* Compare UTF-8/UTF-16/UTF-32 strings using the collation rules of the current
+ locale.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+int
+FUNC (const UNIT *s1, const UNIT *s2)
+{
+ /* When this function succeeds, it sets errno back to its original value.
+ When it fails, it sets errno, but also returns a meaningful return value,
+ for the sake of callers which ignore errno. */
+ int final_errno = errno;
+ const char *encoding = locale_charset ();
+ char *sl1;
+ char *sl2;
+ int result;
+
+ /* Pass iconveh_error here, not iconveh_question_mark. Otherwise the
+ conversion to locale encoding can do transliteration or map some
+ characters to question marks, leading to results that depend on the
+ iconv() implementation and are not obvious. */
+ sl1 = U_STRCONV_TO_ENCODING (s1, encoding, iconveh_error);
+ if (sl1 != NULL)
+ {
+ sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error);
+ if (sl2 != NULL)
+ {
+ /* Compare sl1 and sl2. */
+ errno = 0;
+ result = strcoll (sl1, sl2);
+ if (errno == 0)
+ {
+ /* strcoll succeeded. */
+ free (sl1);
+ free (sl2);
+ /* The conversion to locale encoding can drop Unicode TAG
+ characters. Therefore sl1 and sl2 may be equal when s1
+ and s2 were in fact different. Return a nonzero result
+ in this case. */
+ if (result == 0)
+ result = U_STRCMP (s1, s2);
+ }
+ else
+ {
+ /* strcoll failed. */
+ final_errno = errno;
+ free (sl1);
+ free (sl2);
+ result = U_STRCMP (s1, s2);
+ }
+ }
+ else
+ {
+ /* s1 could be converted to locale encoding, s2 not. */
+ final_errno = errno;
+ free (sl1);
+ result = -1;
+ }
+ }
+ else
+ {
+ final_errno = errno;
+ sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error);
+ if (sl2 != NULL)
+ {
+ /* s2 could be converted to locale encoding, s1 not. */
+ free (sl2);
+ result = 1;
+ }
+ else
+ {
+ /* Neither s1 nor s2 could be converted to locale encoding. */
+ result = U_STRCMP (s1, s2);
+ }
+ }
+
+ errno = final_errno;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u-strcpy.h b/gnulib/lib/unistr/u-strcpy.h
new file mode 100644
index 0000000..493fcdb
--- /dev/null
+++ b/gnulib/lib/unistr/u-strcpy.h
@@ -0,0 +1,26 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src)
+{
+ UNIT *destptr = dest;
+
+ for (; (*destptr = *src) != 0; src++, destptr++)
+ ;
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strcspn.h b/gnulib/lib/unistr/u-strcspn.h
new file mode 100644
index 0000000..d55294a
--- /dev/null
+++ b/gnulib/lib/unistr/u-strcspn.h
@@ -0,0 +1,54 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *str, const UNIT *reject)
+{
+ /* Optimize two cases. */
+ if (reject[0] == 0)
+ return U_STRLEN (str);
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, reject);
+ if (count >= 0 && reject[count] == 0)
+ {
+ const UNIT *found = U_STRCHR (str, uc);
+ if (found != NULL)
+ return found - str;
+ else
+ return U_STRLEN (str);
+ }
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count == 0)
+ return ptr - str;
+ if (count < 0)
+ break;
+ if (U_STRCHR (reject, uc))
+ return ptr - str;
+ ptr += count;
+ }
+ return U_STRLEN (str);
+ }
+}
diff --git a/gnulib/lib/unistr/u-strdup.h b/gnulib/lib/unistr/u-strdup.h
new file mode 100644
index 0000000..ecfcc7b
--- /dev/null
+++ b/gnulib/lib/unistr/u-strdup.h
@@ -0,0 +1,41 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+
+UNIT *
+FUNC (const UNIT *s)
+{
+ size_t n = U_STRLEN (s) + 1;
+ UNIT *dest;
+
+ dest = (UNIT *) malloc (n * sizeof (UNIT));
+ if (dest != NULL)
+ {
+#if 0
+ UNIT *destptr = dest;
+
+ for (; n > 0; n--)
+ *destptr++ = *s++;
+#else
+ memcpy ((char *) dest, (const char *) s, n * sizeof (UNIT));
+#endif
+ }
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strlen.h b/gnulib/lib/unistr/u-strlen.h
new file mode 100644
index 0000000..69ed336
--- /dev/null
+++ b/gnulib/lib/unistr/u-strlen.h
@@ -0,0 +1,26 @@
+/* Determine length of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *s)
+{
+ const UNIT *ptr;
+
+ for (ptr = s; *ptr != 0; ptr++)
+ ;
+ return ptr - s;
+}
diff --git a/gnulib/lib/unistr/u-strncat.h b/gnulib/lib/unistr/u-strncat.h
new file mode 100644
index 0000000..2b876db
--- /dev/null
+++ b/gnulib/lib/unistr/u-strncat.h
@@ -0,0 +1,28 @@
+/* Concatenate UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ UNIT *destptr = dest + U_STRLEN (dest);
+
+ for (; n > 0 && (*destptr = *src) != 0; src++, destptr++, n--)
+ ;
+ if (n == 0)
+ *destptr = 0;
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strncpy.h b/gnulib/lib/unistr/u-strncpy.h
new file mode 100644
index 0000000..8e49f04
--- /dev/null
+++ b/gnulib/lib/unistr/u-strncpy.h
@@ -0,0 +1,32 @@
+/* Copy UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *dest, const UNIT *src, size_t n)
+{
+ UNIT *destptr = dest;
+
+ for (; n > 0 && (*destptr = *src) != 0; src++, destptr++, n--)
+ ;
+
+ /* This behavior is rarely useful, but it is here for consistency with
+ strncpy and wcsncpy. */
+ for (; n > 0; n--)
+ *destptr++ = 0;
+
+ return dest;
+}
diff --git a/gnulib/lib/unistr/u-strnlen.h b/gnulib/lib/unistr/u-strnlen.h
new file mode 100644
index 0000000..c042750
--- /dev/null
+++ b/gnulib/lib/unistr/u-strnlen.h
@@ -0,0 +1,26 @@
+/* Determine bounded length of UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *s, size_t maxlen)
+{
+ const UNIT *ptr;
+
+ for (ptr = s; maxlen > 0 && *ptr != 0; ptr++, maxlen--)
+ ;
+ return ptr - s;
+}
diff --git a/gnulib/lib/unistr/u-strpbrk.h b/gnulib/lib/unistr/u-strpbrk.h
new file mode 100644
index 0000000..5582b3a
--- /dev/null
+++ b/gnulib/lib/unistr/u-strpbrk.h
@@ -0,0 +1,46 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return NULL;
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, accept);
+ if (count >= 0 && accept[count] == 0)
+ return U_STRCHR (str, uc);
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count <= 0)
+ break;
+ if (U_STRCHR (accept, uc))
+ return (UNIT *) ptr;
+ ptr += count;
+ }
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u-strspn.h b/gnulib/lib/unistr/u-strspn.h
new file mode 100644
index 0000000..c414576
--- /dev/null
+++ b/gnulib/lib/unistr/u-strspn.h
@@ -0,0 +1,54 @@
+/* Search for some characters in UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+size_t
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return 0;
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, accept);
+ if (count >= 0 && accept[count] == 0)
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr += count)
+ if (U_CMP (ptr, accept, count) != 0)
+ break;
+ return ptr - str;
+ }
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+
+ for (;;)
+ {
+ ucs4_t uc;
+ int count = U_STRMBTOUC (&uc, ptr);
+ if (count == 0)
+ return ptr - str;
+ if (count < 0)
+ break;
+ if (!U_STRCHR (accept, uc))
+ return ptr - str;
+ ptr += count;
+ }
+ return U_STRLEN (str);
+ }
+}
diff --git a/gnulib/lib/unistr/u-strstr.h b/gnulib/lib/unistr/u-strstr.h
new file mode 100644
index 0000000..336ce8b
--- /dev/null
+++ b/gnulib/lib/unistr/u-strstr.h
@@ -0,0 +1,131 @@
+/* Substring test for UTF-8/UTF-16/UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2010-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (const UNIT *haystack, const UNIT *needle)
+{
+ UNIT first = needle[0];
+
+ /* Is needle empty? */
+ if (first == 0)
+ return (UNIT *) haystack;
+
+ /* Is needle nearly empty (only one unit)? */
+ if (needle[1] == 0)
+ return U_STRCHR (haystack, first);
+
+#ifdef U_STRMBTOUC
+ /* Is needle nearly empty (only one character)? */
+ {
+ ucs4_t first_uc;
+ int count = U_STRMBTOUC (&first_uc, needle);
+ if (count > 0 && needle[count] == 0)
+ return U_STRCHR (haystack, first_uc);
+ }
+#endif
+
+#if UNIT_IS_UINT8_T
+ return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
+#else
+ {
+ /* Minimizing the worst-case complexity:
+ Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
+ The naïve algorithm is O(n*m) worst-case.
+ The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
+ memory allocation.
+ To achieve linear complexity and yet amortize the cost of the
+ memory allocation, we activate the Knuth-Morris-Pratt algorithm
+ only once the naïve algorithm has already run for some time; more
+ precisely, when
+ - the outer loop count is >= 10,
+ - the average number of comparisons per outer loop is >= 5,
+ - the total number of comparisons is >= m.
+ But we try it only once. If the memory allocation attempt failed,
+ we don't retry it. */
+ bool try_kmp = true;
+ size_t outer_loop_count = 0;
+ size_t comparison_count = 0;
+ size_t last_ccount = 0; /* last comparison count */
+ const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
+
+ /* Speed up the following searches of needle by caching its first
+ character. */
+ UNIT b = *needle++;
+
+ for (;; haystack++)
+ {
+ if (*haystack == 0)
+ /* No match. */
+ return NULL;
+
+ /* See whether it's advisable to use an asymptotically faster
+ algorithm. */
+ if (try_kmp
+ && outer_loop_count >= 10
+ && comparison_count >= 5 * outer_loop_count)
+ {
+ /* See if needle + comparison_count now reaches the end of
+ needle. */
+ if (needle_last_ccount != NULL)
+ {
+ needle_last_ccount +=
+ U_STRNLEN (needle_last_ccount,
+ comparison_count - last_ccount);
+ if (*needle_last_ccount == 0)
+ needle_last_ccount = NULL;
+ last_ccount = comparison_count;
+ }
+ if (needle_last_ccount == NULL)
+ {
+ /* Try the Knuth-Morris-Pratt algorithm. */
+ const UNIT *result;
+ bool success =
+ knuth_morris_pratt (haystack,
+ needle - 1, U_STRLEN (needle - 1),
+ &result);
+ if (success)
+ return (UNIT *) result;
+ try_kmp = false;
+ }
+ }
+
+ outer_loop_count++;
+ comparison_count++;
+ if (*haystack == b)
+ /* The first character matches. */
+ {
+ const UNIT *rhaystack = haystack + 1;
+ const UNIT *rneedle = needle;
+
+ for (;; rhaystack++, rneedle++)
+ {
+ if (*rneedle == 0)
+ /* Found a match. */
+ return (UNIT *) haystack;
+ if (*rhaystack == 0)
+ /* No match. */
+ return NULL;
+ comparison_count++;
+ if (*rhaystack != *rneedle)
+ /* Nothing in this round. */
+ break;
+ }
+ }
+ }
+ }
+#endif
+}
diff --git a/gnulib/lib/unistr/u-strtok.h b/gnulib/lib/unistr/u-strtok.h
new file mode 100644
index 0000000..763dcca
--- /dev/null
+++ b/gnulib/lib/unistr/u-strtok.h
@@ -0,0 +1,52 @@
+/* Tokenize UTF-8/UTF-16/UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+UNIT *
+FUNC (UNIT *str, const UNIT *delim, UNIT **ptr)
+{
+ if (str == NULL)
+ {
+ str = *ptr;
+ if (str == NULL)
+ return NULL; /* reminder that end of token sequence has been reached */
+ }
+
+ /* Skip leading delimiters. */
+ str += U_STRSPN (str, delim);
+
+ /* Found a token? */
+ if (*str == 0)
+ {
+ *ptr = NULL;
+ return NULL;
+ }
+
+ /* Move past the token. */
+ {
+ UNIT *token_end = U_STRPBRK (str, delim);
+ if (token_end)
+ {
+ /* NUL-terminate the token. */
+ *token_end = 0;
+ *ptr = token_end + 1;
+ }
+ else
+ *ptr = NULL;
+ }
+
+ return str;
+}
diff --git a/gnulib/lib/unistr/u16-check.c b/gnulib/lib/unistr/u16-check.c
new file mode 100644
index 0000000..79c9c49
--- /dev/null
+++ b/gnulib/lib/unistr/u16-check.c
@@ -0,0 +1,51 @@
+/* Check UTF-16 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_check (const uint16_t *s, size_t n)
+{
+ const uint16_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ s++;
+ continue;
+ }
+ if (c < 0xdc00)
+ {
+ if (s + 2 <= s_end
+ && s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ s += 2;
+ continue;
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u16-chr.c b/gnulib/lib/unistr/u16-chr.c
new file mode 100644
index 0000000..dab4ce3
--- /dev/null
+++ b/gnulib/lib/unistr/u16-chr.c
@@ -0,0 +1,57 @@
+/* Search character in piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
+{
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (; n > 0; s++, n--)
+ {
+ if (*s == c0)
+ return (uint16_t *) s;
+ }
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (n > 1)
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ for (n--; n > 0; s++, n--)
+ {
+ if (*s == c0 && s[1] == c1)
+ return (uint16_t *) s;
+ }
+ }
+ break;
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u16-cmp.c b/gnulib/lib/unistr/u16-cmp.c
new file mode 100644
index 0000000..4e3e136
--- /dev/null
+++ b/gnulib/lib/unistr/u16-cmp.c
@@ -0,0 +1,54 @@
+/* Compare pieces of UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (; n > 0;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2. */
+ }
+ return 0;
+}
diff --git a/gnulib/lib/unistr/u16-cmp2.c b/gnulib/lib/unistr/u16-cmp2.c
new file mode 100644
index 0000000..f990c5c
--- /dev/null
+++ b/gnulib/lib/unistr/u16-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-16 strings.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u16_cmp2
+#define UNIT uint16_t
+#define U_CMP u16_cmp
+#include "u-cmp2.h"
diff --git a/gnulib/lib/unistr/u16-cpy-alloc.c b/gnulib/lib/unistr/u16-cpy-alloc.c
new file mode 100644
index 0000000..930fffc
--- /dev/null
+++ b/gnulib/lib/unistr/u16-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_cpy_alloc
+#define UNIT uint16_t
+#include "u-cpy-alloc.h"
diff --git a/gnulib/lib/unistr/u16-cpy.c b/gnulib/lib/unistr/u16-cpy.c
new file mode 100644
index 0000000..dff750e
--- /dev/null
+++ b/gnulib/lib/unistr/u16-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_cpy
+#define UNIT uint16_t
+#include "u-cpy.h"
diff --git a/gnulib/lib/unistr/u16-endswith.c b/gnulib/lib/unistr/u16-endswith.c
new file mode 100644
index 0000000..9e0c019
--- /dev/null
+++ b/gnulib/lib/unistr/u16-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_endswith
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_CMP u16_cmp
+#include "u-endswith.h"
diff --git a/gnulib/lib/unistr/u16-mblen.c b/gnulib/lib/unistr/u16-mblen.c
new file mode 100644
index 0000000..60f7a69
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mblen.c
@@ -0,0 +1,50 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_mblen (const uint16_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+ {
+ if (n >= 2
+ && s[1] >= 0xdc00 && s[1] < 0xe000)
+ return 2;
+ }
+#else
+ {
+ if (n >= 2)
+ return 2;
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u16-mbsnlen.c b/gnulib/lib/unistr/u16-mbsnlen.c
new file mode 100644
index 0000000..fb0df36
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbsnlen.c
@@ -0,0 +1,42 @@
+/* Count characters in UTF-16 string.
+ Copyright (C) 2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u16_mbsnlen (const uint16_t *s, size_t n)
+{
+ size_t characters;
+
+ characters = 0;
+ while (n > 0)
+ {
+ ucs4_t uc;
+ int count = u16_mbtoucr (&uc, s, n);
+ characters++;
+ if (count == -2)
+ break;
+ if (count <= 0)
+ count = 1;
+ s += count;
+ n -= count;
+ }
+ return characters;
+}
diff --git a/gnulib/lib/unistr/u16-mbtouc-aux.c b/gnulib/lib/unistr/u16-mbtouc-aux.c
new file mode 100644
index 0000000..e8e9c92
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbtouc-aux.c
@@ -0,0 +1,51 @@
+/* Conversion UTF-16 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u16-mbtouc-unsafe-aux.c b/gnulib/lib/unistr/u16-mbtouc-unsafe-aux.c
new file mode 100644
index 0000000..b0abe51
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbtouc-unsafe-aux.c
@@ -0,0 +1,55 @@
+/* Conversion UTF-16 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u16-mbtouc-unsafe.c b/gnulib/lib/unistr/u16-mbtouc-unsafe.c
new file mode 100644
index 0000000..b393397
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbtouc-unsafe.c
@@ -0,0 +1,66 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u16-mbtouc.c b/gnulib/lib/unistr/u16-mbtouc.c
new file mode 100644
index 0000000..9941e9b
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbtouc.c
@@ -0,0 +1,61 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u16-mbtoucr.c b/gnulib/lib/unistr/u16-mbtoucr.c
new file mode 100644
index 0000000..53014a3
--- /dev/null
+++ b/gnulib/lib/unistr/u16-mbtoucr.c
@@ -0,0 +1,54 @@
+/* Look at first character in UTF-16 string, returning an error code.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n)
+{
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return 1;
+ }
+ if (c < 0xdc00)
+ {
+ if (n >= 2)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u16-move.c b/gnulib/lib/unistr/u16-move.c
new file mode 100644
index 0000000..03c6486
--- /dev/null
+++ b/gnulib/lib/unistr/u16-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_move
+#define UNIT uint16_t
+#include "u-move.h"
diff --git a/gnulib/lib/unistr/u16-next.c b/gnulib/lib/unistr/u16-next.c
new file mode 100644
index 0000000..c948710
--- /dev/null
+++ b/gnulib/lib/unistr/u16-next.c
@@ -0,0 +1,37 @@
+/* Iterate over next character in UTF-16 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_next (ucs4_t *puc, const uint16_t *s)
+{
+ int count;
+
+ count = u16_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+ if (count < 0)
+ *puc = 0xfffd;
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u16-prev.c b/gnulib/lib/unistr/u16-prev.c
new file mode 100644
index 0000000..40304d9
--- /dev/null
+++ b/gnulib/lib/unistr/u16-prev.c
@@ -0,0 +1,53 @@
+/* Iterate over previous character in UTF-16 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint16_t *
+u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ if (s != start)
+ {
+ uint16_t c_1 = s[-1];
+
+ if (c_1 < 0xd800 || c_1 >= 0xe000)
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c_1 >= 0xdc00)
+#endif
+ if (s - 1 != start)
+ {
+ uint16_t c_2 = s[-2];
+
+#if CONFIG_UNICODE_SAFETY
+ if (c_2 >= 0xd800 && c_2 < 0xdc00)
+#endif
+ {
+ *puc = 0x10000 + ((c_2 - 0xd800) << 10) + (c_1 - 0xdc00);
+ return s - 2;
+ }
+ }
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u16-set.c b/gnulib/lib/unistr/u16-set.c
new file mode 100644
index 0000000..22b9370
--- /dev/null
+++ b/gnulib/lib/unistr/u16-set.c
@@ -0,0 +1,26 @@
+/* Fill UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_set
+#define UNIT uint16_t
+#define IS_SINGLE_UNIT(uc) (uc < 0xd800 || (uc < 0x10000 && uc >= 0xe000))
+#include "u-set.h"
diff --git a/gnulib/lib/unistr/u16-startswith.c b/gnulib/lib/unistr/u16-startswith.c
new file mode 100644
index 0000000..a7eff4f
--- /dev/null
+++ b/gnulib/lib/unistr/u16-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_startswith
+#define UNIT uint16_t
+#include "u-startswith.h"
diff --git a/gnulib/lib/unistr/u16-stpcpy.c b/gnulib/lib/unistr/u16-stpcpy.c
new file mode 100644
index 0000000..8085a43
--- /dev/null
+++ b/gnulib/lib/unistr/u16-stpcpy.c
@@ -0,0 +1,26 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_stpcpy
+#define UNIT uint16_t
+#include "u-stpcpy.h"
diff --git a/gnulib/lib/unistr/u16-stpncpy.c b/gnulib/lib/unistr/u16-stpncpy.c
new file mode 100644
index 0000000..825326d
--- /dev/null
+++ b/gnulib/lib/unistr/u16-stpncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_stpncpy
+#define UNIT uint16_t
+#include "u-stpncpy.h"
diff --git a/gnulib/lib/unistr/u16-strcat.c b/gnulib/lib/unistr/u16-strcat.c
new file mode 100644
index 0000000..c7819bb
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strcat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcat
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strcat.h"
diff --git a/gnulib/lib/unistr/u16-strchr.c b/gnulib/lib/unistr/u16-strchr.c
new file mode 100644
index 0000000..028a94d
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strchr.c
@@ -0,0 +1,64 @@
+/* Search character in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_strchr (const uint16_t *s, ucs4_t uc)
+{
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ break;
+ if (*s == 0)
+ goto notfound;
+ }
+ return (uint16_t *) s;
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (*s == 0)
+ goto notfound;
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ goto notfound;
+ if (*s == c0 && s[1] == c1)
+ break;
+ }
+ return (uint16_t *) s;
+ }
+ }
+notfound:
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u16-strcmp.c b/gnulib/lib/unistr/u16-strcmp.c
new file mode 100644
index 0000000..df27535
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strcmp.c
@@ -0,0 +1,50 @@
+/* Compare UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strcmp (const uint16_t *s1, const uint16_t *s2)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (;;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 != 0 && c1 == c2)
+ continue;
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2. */
+ }
+}
diff --git a/gnulib/lib/unistr/u16-strcoll.c b/gnulib/lib/unistr/u16-strcoll.c
new file mode 100644
index 0000000..1867e88
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-16 strings using the collation rules of the current locale.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u16_strcoll
+#define UNIT uint16_t
+#define U_STRCMP u16_strcmp
+#define U_STRCONV_TO_ENCODING u16_strconv_to_encoding
+#include "u-strcoll.h"
diff --git a/gnulib/lib/unistr/u16-strcpy.c b/gnulib/lib/unistr/u16-strcpy.c
new file mode 100644
index 0000000..bc0fb9c
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcpy
+#define UNIT uint16_t
+#include "u-strcpy.h"
diff --git a/gnulib/lib/unistr/u16-strcspn.c b/gnulib/lib/unistr/u16-strcspn.c
new file mode 100644
index 0000000..7a29f21
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strcspn.c
@@ -0,0 +1,28 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strcspn
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRCHR u16_strchr
+#include "u-strcspn.h"
diff --git a/gnulib/lib/unistr/u16-strdup.c b/gnulib/lib/unistr/u16-strdup.c
new file mode 100644
index 0000000..22c4c18
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strdup.c
@@ -0,0 +1,26 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strdup
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strdup.h"
diff --git a/gnulib/lib/unistr/u16-strlen.c b/gnulib/lib/unistr/u16-strlen.c
new file mode 100644
index 0000000..6e4f52c
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strlen.c
@@ -0,0 +1,25 @@
+/* Determine length of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strlen
+#define UNIT uint16_t
+#include "u-strlen.h"
diff --git a/gnulib/lib/unistr/u16-strmblen.c b/gnulib/lib/unistr/u16-strmblen.c
new file mode 100644
index 0000000..4e997b5
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strmblen.c
@@ -0,0 +1,44 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strmblen (const uint16_t *s)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+ {
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+ return 2;
+ }
+#else
+ if (s[1] != 0)
+ return 2;
+#endif
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u16-strmbtouc.c b/gnulib/lib/unistr/u16-strmbtouc.c
new file mode 100644
index 0000000..f4eeb49
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strmbtouc.c
@@ -0,0 +1,51 @@
+/* Look at first character in UTF-16 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strmbtouc (ucs4_t *puc, const uint16_t *s)
+{
+ /* Keep in sync with unistr.h and utf16-ucs4.c. */
+ uint16_t c = *s;
+
+ if (c < 0xd800 || c >= 0xe000)
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xdc00)
+#endif
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (s[1] >= 0xdc00 && s[1] < 0xe000)
+#else
+ if (s[1] != 0)
+#endif
+ {
+ *puc = 0x10000 + ((c - 0xd800) << 10) + (s[1] - 0xdc00);
+ return 2;
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u16-strncat.c b/gnulib/lib/unistr/u16-strncat.c
new file mode 100644
index 0000000..601cd8d
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strncat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strncat
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#include "u-strncat.h"
diff --git a/gnulib/lib/unistr/u16-strncmp.c b/gnulib/lib/unistr/u16-strncmp.c
new file mode 100644
index 0000000..55863b1
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strncmp.c
@@ -0,0 +1,54 @@
+/* Compare UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
+{
+ /* Note that the UTF-16 encoding does NOT preserve lexicographic order.
+ Namely, if uc1 is a 16-bit character and [uc2a,uc2b] is a surrogate pair,
+ we must enforce uc1 < [uc2a,uc2b], even if uc1 > uc2a. */
+ for (; n > 0;)
+ {
+ uint16_t c1 = *s1++;
+ uint16_t c2 = *s2++;
+ if (c1 != 0 && c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ if (c1 < 0xd800 || c1 >= 0xe000)
+ {
+ if (!(c2 < 0xd800 || c2 >= 0xe000))
+ /* c2 is a surrogate, but c1 is not. */
+ return -1;
+ }
+ else
+ {
+ if (c2 < 0xd800 || c2 >= 0xe000)
+ /* c1 is a surrogate, but c2 is not. */
+ return 1;
+ }
+ return (int)c1 - (int)c2;
+ /* > 0 if c1 > c2, < 0 if c1 < c2, = 0 if c1 and c2 are both 0. */
+ }
+ return 0;
+}
diff --git a/gnulib/lib/unistr/u16-strncpy.c b/gnulib/lib/unistr/u16-strncpy.c
new file mode 100644
index 0000000..6f0d638
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strncpy
+#define UNIT uint16_t
+#include "u-strncpy.h"
diff --git a/gnulib/lib/unistr/u16-strnlen.c b/gnulib/lib/unistr/u16-strnlen.c
new file mode 100644
index 0000000..8c2a80e
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strnlen.c
@@ -0,0 +1,25 @@
+/* Determine bounded length of UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strnlen
+#define UNIT uint16_t
+#include "u-strnlen.h"
diff --git a/gnulib/lib/unistr/u16-strpbrk.c b/gnulib/lib/unistr/u16-strpbrk.c
new file mode 100644
index 0000000..150ceea
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strpbrk.c
@@ -0,0 +1,27 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strpbrk
+#define UNIT uint16_t
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRCHR u16_strchr
+#include "u-strpbrk.h"
diff --git a/gnulib/lib/unistr/u16-strrchr.c b/gnulib/lib/unistr/u16-strrchr.c
new file mode 100644
index 0000000..1ee2d7f
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strrchr.c
@@ -0,0 +1,65 @@
+/* Search character in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint16_t *
+u16_strrchr (const uint16_t *s, ucs4_t uc)
+{
+ /* Calling u16_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint16_t *result = NULL;
+ uint16_t c[2];
+
+ if (uc < 0x10000)
+ {
+ uint16_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ result = (uint16_t *) s;
+ if (*s == 0)
+ break;
+ }
+ }
+ else
+ switch (u16_uctomb_aux (c, uc, 2))
+ {
+ case 2:
+ if (*s)
+ {
+ uint16_t c0 = c[0];
+ uint16_t c1 = c[1];
+
+ /* FIXME: Maybe walking the string via u16_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ break;
+ if (*s == c0 && s[1] == c1)
+ result = (uint16_t *) s;
+ }
+ }
+ break;
+ }
+ return result;
+}
diff --git a/gnulib/lib/unistr/u16-strspn.c b/gnulib/lib/unistr/u16-strspn.c
new file mode 100644
index 0000000..c39bb71
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strspn.c
@@ -0,0 +1,29 @@
+/* Search for some characters in UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strspn
+#define UNIT uint16_t
+#define U_STRLEN u16_strlen
+#define U_STRMBTOUC u16_strmbtouc
+#define U_CMP u16_cmp
+#define U_STRCHR u16_strchr
+#include "u-strspn.h"
diff --git a/gnulib/lib/unistr/u16-strstr.c b/gnulib/lib/unistr/u16-strstr.c
new file mode 100644
index 0000000..934a4fc
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strstr.c
@@ -0,0 +1,37 @@
+/* Substring test for UTF-16 strings.
+ Copyright (C) 1999, 2002, 2006, 2010-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "malloca.h"
+
+/* FIXME: Maybe walking the string via u16_mblen is a win? */
+
+#define UNIT uint16_t
+
+#define CANON_ELEMENT(c) c
+#include "str-kmp.h"
+
+#define FUNC u16_strstr
+#define U_STRCHR u16_strchr
+#define U_STRMBTOUC u16_strmbtouc
+#define U_STRLEN u16_strlen
+#define U_STRNLEN u16_strnlen
+#include "u-strstr.h"
diff --git a/gnulib/lib/unistr/u16-strtok.c b/gnulib/lib/unistr/u16-strtok.c
new file mode 100644
index 0000000..50f30a3
--- /dev/null
+++ b/gnulib/lib/unistr/u16-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-16 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_strtok
+#define UNIT uint16_t
+#define U_STRSPN u16_strspn
+#define U_STRPBRK u16_strpbrk
+#include "u-strtok.h"
diff --git a/gnulib/lib/unistr/u16-to-u32.c b/gnulib/lib/unistr/u16-to-u32.c
new file mode 100644
index 0000000..bcf2e63
--- /dev/null
+++ b/gnulib/lib/unistr/u16-to-u32.c
@@ -0,0 +1,125 @@
+/* Convert UTF-16 string to UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_to_u32
+#define SRC_UNIT uint16_t
+#define DST_UNIT uint32_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u16_mbtoucr (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ if (length + 1 > allocated)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 1 > allocated)
+ allocated = length + 1;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ }
+ result[length++] = uc;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u16-to-u8.c b/gnulib/lib/unistr/u16-to-u8.c
new file mode 100644
index 0000000..02eee97
--- /dev/null
+++ b/gnulib/lib/unistr/u16-to-u8.c
@@ -0,0 +1,136 @@
+/* Convert UTF-16 string to UTF-8 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u16_to_u8
+#define SRC_UNIT uint16_t
+#define DST_UNIT uint8_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u16_mbtoucr (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 6 > allocated)
+ allocated = length + 6;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u16-uctomb-aux.c b/gnulib/lib/unistr/u16-uctomb-aux.c
new file mode 100644
index 0000000..9ec1e7b
--- /dev/null
+++ b/gnulib/lib/unistr/u16-uctomb-aux.c
@@ -0,0 +1,58 @@
+/* Conversion UCS-4 to UTF-16.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800)
+ {
+ /* The case n >= 1 is already handled by the caller. */
+ }
+ else if (uc < 0x10000)
+ {
+ if (uc >= 0xe000)
+ {
+ if (n >= 1)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ if (uc < 0x110000)
+ {
+ if (n >= 2)
+ {
+ s[0] = 0xd800 + ((uc - 0x10000) >> 10);
+ s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
+ return 2;
+ }
+ }
+ else
+ return -1;
+ }
+ return -2;
+}
diff --git a/gnulib/lib/unistr/u16-uctomb.c b/gnulib/lib/unistr/u16-uctomb.c
new file mode 100644
index 0000000..1bc66fd
--- /dev/null
+++ b/gnulib/lib/unistr/u16-uctomb.c
@@ -0,0 +1,72 @@
+/* Store a character in UTF-16 string.
+ Copyright (C) 2002, 2005-2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u16_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u16_uctomb (uint16_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800)
+ {
+ if (n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ /* else return -2, below. */
+ }
+ else if (uc < 0x10000)
+ {
+ if (uc >= 0xe000)
+ {
+ if (n >= 1)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ if (uc < 0x110000)
+ {
+ if (n >= 2)
+ {
+ s[0] = 0xd800 + ((uc - 0x10000) >> 10);
+ s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff);
+ return 2;
+ }
+ }
+ else
+ return -1;
+ }
+ return -2;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u32-check.c b/gnulib/lib/unistr/u32-check.c
new file mode 100644
index 0000000..9476a1c
--- /dev/null
+++ b/gnulib/lib/unistr/u32-check.c
@@ -0,0 +1,39 @@
+/* Check UTF-32 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_check (const uint32_t *s, size_t n)
+{
+ const uint32_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ s++;
+ else
+ /* invalid Unicode character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u32-chr.c b/gnulib/lib/unistr/u32-chr.c
new file mode 100644
index 0000000..5dba257
--- /dev/null
+++ b/gnulib/lib/unistr/u32-chr.c
@@ -0,0 +1,32 @@
+/* Search character in piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
+{
+ for (; n > 0; s++, n--)
+ {
+ if (*s == uc)
+ return (uint32_t *) s;
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u32-cmp.c b/gnulib/lib/unistr/u32-cmp.c
new file mode 100644
index 0000000..73c6dc9
--- /dev/null
+++ b/gnulib/lib/unistr/u32-cmp.c
@@ -0,0 +1,40 @@
+/* Compare pieces of UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
+{
+ for (; n > 0;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 == uc2)
+ {
+ n--;
+ continue;
+ }
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */
+ }
+ return 0;
+}
diff --git a/gnulib/lib/unistr/u32-cmp2.c b/gnulib/lib/unistr/u32-cmp2.c
new file mode 100644
index 0000000..cfe00a6
--- /dev/null
+++ b/gnulib/lib/unistr/u32-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-32 strings.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u32_cmp2
+#define UNIT uint32_t
+#define U_CMP u32_cmp
+#include "u-cmp2.h"
diff --git a/gnulib/lib/unistr/u32-cpy-alloc.c b/gnulib/lib/unistr/u32-cpy-alloc.c
new file mode 100644
index 0000000..892d235
--- /dev/null
+++ b/gnulib/lib/unistr/u32-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_cpy_alloc
+#define UNIT uint32_t
+#include "u-cpy-alloc.h"
diff --git a/gnulib/lib/unistr/u32-cpy.c b/gnulib/lib/unistr/u32-cpy.c
new file mode 100644
index 0000000..5d8b01a
--- /dev/null
+++ b/gnulib/lib/unistr/u32-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_cpy
+#define UNIT uint32_t
+#include "u-cpy.h"
diff --git a/gnulib/lib/unistr/u32-endswith.c b/gnulib/lib/unistr/u32-endswith.c
new file mode 100644
index 0000000..bb7a2ce
--- /dev/null
+++ b/gnulib/lib/unistr/u32-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_endswith
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#define U_CMP u32_cmp
+#include "u-endswith.h"
diff --git a/gnulib/lib/unistr/u32-mblen.c b/gnulib/lib/unistr/u32-mblen.c
new file mode 100644
index 0000000..b4ae146
--- /dev/null
+++ b/gnulib/lib/unistr/u32-mblen.c
@@ -0,0 +1,37 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_mblen (const uint32_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ return (c != 0 ? 1 : 0);
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u32-mbsnlen.c b/gnulib/lib/unistr/u32-mbsnlen.c
new file mode 100644
index 0000000..d9ca6ea
--- /dev/null
+++ b/gnulib/lib/unistr/u32-mbsnlen.c
@@ -0,0 +1,27 @@
+/* Count characters in UTF-32 string.
+ Copyright (C) 2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u32_mbsnlen (const uint32_t *s, size_t n)
+{
+ return n;
+}
diff --git a/gnulib/lib/unistr/u32-mbtouc-unsafe.c b/gnulib/lib/unistr/u32-mbtouc-unsafe.c
new file mode 100644
index 0000000..6130fce
--- /dev/null
+++ b/gnulib/lib/unistr/u32-mbtouc-unsafe.c
@@ -0,0 +1,48 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ *puc = c;
+#if CONFIG_UNICODE_SAFETY
+ else
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+#endif
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u32-mbtouc.c b/gnulib/lib/unistr/u32-mbtouc.c
new file mode 100644
index 0000000..2146b47
--- /dev/null
+++ b/gnulib/lib/unistr/u32-mbtouc.c
@@ -0,0 +1,43 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ *puc = c;
+ else
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u32-mbtoucr.c b/gnulib/lib/unistr/u32-mbtoucr.c
new file mode 100644
index 0000000..80f5194
--- /dev/null
+++ b/gnulib/lib/unistr/u32-mbtoucr.c
@@ -0,0 +1,39 @@
+/* Look at first character in UTF-32 string, returning an error code.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n)
+{
+ uint32_t c = *s;
+
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+ {
+ *puc = c;
+ return 1;
+ }
+ else
+ {
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+ }
+}
diff --git a/gnulib/lib/unistr/u32-move.c b/gnulib/lib/unistr/u32-move.c
new file mode 100644
index 0000000..c3f1161
--- /dev/null
+++ b/gnulib/lib/unistr/u32-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_move
+#define UNIT uint32_t
+#include "u-move.h"
diff --git a/gnulib/lib/unistr/u32-next.c b/gnulib/lib/unistr/u32-next.c
new file mode 100644
index 0000000..d087630
--- /dev/null
+++ b/gnulib/lib/unistr/u32-next.c
@@ -0,0 +1,39 @@
+/* Iterate over next character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_next (ucs4_t *puc, const uint32_t *s)
+{
+ int count;
+
+ count = u32_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+#if CONFIG_UNICODE_SAFETY
+ if (count < 0)
+ *puc = 0xfffd;
+#endif
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u32-prev.c b/gnulib/lib/unistr/u32-prev.c
new file mode 100644
index 0000000..c77f50c
--- /dev/null
+++ b/gnulib/lib/unistr/u32-prev.c
@@ -0,0 +1,39 @@
+/* Iterate over previous character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint32_t *
+u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start)
+{
+ if (s != start)
+ {
+ uint32_t c_1 = s[-1];
+
+#if CONFIG_UNICODE_SAFETY
+ if (c_1 < 0xd800 || (c_1 >= 0xe000 && c_1 < 0x110000))
+#endif
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u32-set.c b/gnulib/lib/unistr/u32-set.c
new file mode 100644
index 0000000..21cda1a
--- /dev/null
+++ b/gnulib/lib/unistr/u32-set.c
@@ -0,0 +1,26 @@
+/* Fill UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_set
+#define UNIT uint32_t
+#define IS_SINGLE_UNIT(uc) (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
+#include "u-set.h"
diff --git a/gnulib/lib/unistr/u32-startswith.c b/gnulib/lib/unistr/u32-startswith.c
new file mode 100644
index 0000000..b884ff6
--- /dev/null
+++ b/gnulib/lib/unistr/u32-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_startswith
+#define UNIT uint32_t
+#include "u-startswith.h"
diff --git a/gnulib/lib/unistr/u32-stpcpy.c b/gnulib/lib/unistr/u32-stpcpy.c
new file mode 100644
index 0000000..134a574
--- /dev/null
+++ b/gnulib/lib/unistr/u32-stpcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_stpcpy
+#define UNIT uint32_t
+#include "u-stpcpy.h"
diff --git a/gnulib/lib/unistr/u32-stpncpy.c b/gnulib/lib/unistr/u32-stpncpy.c
new file mode 100644
index 0000000..8b3fe52
--- /dev/null
+++ b/gnulib/lib/unistr/u32-stpncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_stpncpy
+#define UNIT uint32_t
+#include "u-stpncpy.h"
diff --git a/gnulib/lib/unistr/u32-strcat.c b/gnulib/lib/unistr/u32-strcat.c
new file mode 100644
index 0000000..8ca8feb
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strcat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcat
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strcat.h"
diff --git a/gnulib/lib/unistr/u32-strchr.c b/gnulib/lib/unistr/u32-strchr.c
new file mode 100644
index 0000000..baa661a
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strchr.c
@@ -0,0 +1,36 @@
+/* Search character in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_strchr (const uint32_t *s, ucs4_t uc)
+{
+ for (;; s++)
+ {
+ if (*s == uc)
+ break;
+ if (*s == 0)
+ goto notfound;
+ }
+ return (uint32_t *) s;
+notfound:
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u32-strcmp.c b/gnulib/lib/unistr/u32-strcmp.c
new file mode 100644
index 0000000..70bfc1a
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strcmp.c
@@ -0,0 +1,36 @@
+/* Compare UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strcmp (const uint32_t *s1, const uint32_t *s2)
+{
+ for (;;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 != 0 && uc1 == uc2)
+ continue;
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2. */
+ }
+}
diff --git a/gnulib/lib/unistr/u32-strcoll.c b/gnulib/lib/unistr/u32-strcoll.c
new file mode 100644
index 0000000..52cee48
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-32 strings using the collation rules of the current locale.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u32_strcoll
+#define UNIT uint32_t
+#define U_STRCMP u32_strcmp
+#define U_STRCONV_TO_ENCODING u32_strconv_to_encoding
+#include "u-strcoll.h"
diff --git a/gnulib/lib/unistr/u32-strcpy.c b/gnulib/lib/unistr/u32-strcpy.c
new file mode 100644
index 0000000..c2801ec
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strcpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcpy
+#define UNIT uint32_t
+#include "u-strcpy.h"
diff --git a/gnulib/lib/unistr/u32-strcspn.c b/gnulib/lib/unistr/u32-strcspn.c
new file mode 100644
index 0000000..b53a417
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strcspn.c
@@ -0,0 +1,51 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strcspn
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#define U_STRCHR u32_strchr
+
+size_t
+FUNC (const UNIT *str, const UNIT *reject)
+{
+ /* Optimize two cases. */
+ if (reject[0] == 0)
+ return U_STRLEN (str);
+ if (reject[1] == 0)
+ {
+ ucs4_t uc = reject[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr == uc)
+ break;
+ return ptr - str;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (U_STRCHR (reject, *ptr))
+ break;
+ return ptr - str;
+ }
+}
diff --git a/gnulib/lib/unistr/u32-strdup.c b/gnulib/lib/unistr/u32-strdup.c
new file mode 100644
index 0000000..e8853d9
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strdup.c
@@ -0,0 +1,26 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strdup
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strdup.h"
diff --git a/gnulib/lib/unistr/u32-strlen.c b/gnulib/lib/unistr/u32-strlen.c
new file mode 100644
index 0000000..e173a1b
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strlen.c
@@ -0,0 +1,25 @@
+/* Determine length of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strlen
+#define UNIT uint32_t
+#include "u-strlen.h"
diff --git a/gnulib/lib/unistr/u32-strmblen.c b/gnulib/lib/unistr/u32-strmblen.c
new file mode 100644
index 0000000..722c434
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strmblen.c
@@ -0,0 +1,36 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strmblen (const uint32_t *s)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ return (c != 0 ? 1 : 0);
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ return -1;
+#endif
+}
diff --git a/gnulib/lib/unistr/u32-strmbtouc.c b/gnulib/lib/unistr/u32-strmbtouc.c
new file mode 100644
index 0000000..7f02599
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strmbtouc.c
@@ -0,0 +1,39 @@
+/* Look at first character in UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strmbtouc (ucs4_t *puc, const uint32_t *s)
+{
+ uint32_t c = *s;
+
+#if CONFIG_UNICODE_SAFETY
+ if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
+#endif
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ return -1;
+#endif
+}
diff --git a/gnulib/lib/unistr/u32-strncat.c b/gnulib/lib/unistr/u32-strncat.c
new file mode 100644
index 0000000..7f1f277
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strncat.c
@@ -0,0 +1,26 @@
+/* Concatenate UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strncat
+#define UNIT uint32_t
+#define U_STRLEN u32_strlen
+#include "u-strncat.h"
diff --git a/gnulib/lib/unistr/u32-strncmp.c b/gnulib/lib/unistr/u32-strncmp.c
new file mode 100644
index 0000000..27a1113
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strncmp.c
@@ -0,0 +1,40 @@
+/* Compare UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
+{
+ for (; n > 0;)
+ {
+ uint32_t uc1 = *s1++;
+ uint32_t uc2 = *s2++;
+ if (uc1 != 0 && uc1 == uc2)
+ {
+ n--;
+ continue;
+ }
+ /* Note that uc1 and uc2 each have at most 31 bits. */
+ return (int)uc1 - (int)uc2;
+ /* > 0 if uc1 > uc2, < 0 if uc1 < uc2, = 0 if uc1 and uc2 are both 0. */
+ }
+ return 0;
+}
diff --git a/gnulib/lib/unistr/u32-strncpy.c b/gnulib/lib/unistr/u32-strncpy.c
new file mode 100644
index 0000000..aa43115
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strncpy.c
@@ -0,0 +1,25 @@
+/* Copy UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strncpy
+#define UNIT uint32_t
+#include "u-strncpy.h"
diff --git a/gnulib/lib/unistr/u32-strnlen.c b/gnulib/lib/unistr/u32-strnlen.c
new file mode 100644
index 0000000..c2da0ed
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strnlen.c
@@ -0,0 +1,25 @@
+/* Determine bounded length of UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strnlen
+#define UNIT uint32_t
+#include "u-strnlen.h"
diff --git a/gnulib/lib/unistr/u32-strpbrk.c b/gnulib/lib/unistr/u32-strpbrk.c
new file mode 100644
index 0000000..df7571d
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strpbrk.c
@@ -0,0 +1,50 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strpbrk
+#define UNIT uint32_t
+#define U_STRCHR u32_strchr
+
+UNIT *
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return NULL;
+ if (accept[1] == 0)
+ {
+ ucs4_t uc = accept[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr == uc)
+ return (UNIT *) ptr;
+ return NULL;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (U_STRCHR (accept, *ptr))
+ return (UNIT *) ptr;
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u32-strrchr.c b/gnulib/lib/unistr/u32-strrchr.c
new file mode 100644
index 0000000..436e351
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strrchr.c
@@ -0,0 +1,38 @@
+/* Search character in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint32_t *
+u32_strrchr (const uint32_t *s, ucs4_t uc)
+{
+ /* Calling u32_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint32_t *result = NULL;
+
+ for (;; s++)
+ {
+ if (*s == uc)
+ result = (uint32_t *) s;
+ if (*s == 0)
+ break;
+ }
+ return result;
+}
diff --git a/gnulib/lib/unistr/u32-strspn.c b/gnulib/lib/unistr/u32-strspn.c
new file mode 100644
index 0000000..56f5456
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strspn.c
@@ -0,0 +1,50 @@
+/* Search for some characters in UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strspn
+#define UNIT uint32_t
+#define U_STRCHR u32_strchr
+
+size_t
+FUNC (const UNIT *str, const UNIT *accept)
+{
+ /* Optimize two cases. */
+ if (accept[0] == 0)
+ return 0;
+ if (accept[1] == 0)
+ {
+ ucs4_t uc = accept[0];
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (*ptr != uc)
+ break;
+ return ptr - str;
+ }
+ /* General case. */
+ {
+ const UNIT *ptr = str;
+ for (; *ptr != 0; ptr++)
+ if (!U_STRCHR (accept, *ptr))
+ break;
+ return ptr - str;
+ }
+}
diff --git a/gnulib/lib/unistr/u32-strstr.c b/gnulib/lib/unistr/u32-strstr.c
new file mode 100644
index 0000000..00a7f3e
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strstr.c
@@ -0,0 +1,34 @@
+/* Substring test for UTF-32 strings.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "malloca.h"
+
+#define UNIT uint32_t
+
+#define CANON_ELEMENT(c) c
+#include "str-kmp.h"
+
+#define FUNC u32_strstr
+#define U_STRCHR u32_strchr
+#define U_STRLEN u32_strlen
+#define U_STRNLEN u32_strnlen
+#include "u-strstr.h"
diff --git a/gnulib/lib/unistr/u32-strtok.c b/gnulib/lib/unistr/u32-strtok.c
new file mode 100644
index 0000000..54c9fed
--- /dev/null
+++ b/gnulib/lib/unistr/u32-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-32 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_strtok
+#define UNIT uint32_t
+#define U_STRSPN u32_strspn
+#define U_STRPBRK u32_strpbrk
+#include "u-strtok.h"
diff --git a/gnulib/lib/unistr/u32-to-u16.c b/gnulib/lib/unistr/u32-to-u16.c
new file mode 100644
index 0000000..131807b
--- /dev/null
+++ b/gnulib/lib/unistr/u32-to-u16.c
@@ -0,0 +1,130 @@
+/* Convert UTF-32 string to UTF-16 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_to_u16
+#define SRC_UNIT uint32_t
+#define DST_UNIT uint16_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ uc = *s++;
+ /* No need to call the safe variant u32_mbtouc, because
+ u16_uctomb will verify uc anyway. */
+
+ /* Store it in the output string. */
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 2 > allocated)
+ allocated = length + 2;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u32-to-u8.c b/gnulib/lib/unistr/u32-to-u8.c
new file mode 100644
index 0000000..3b2e1d2
--- /dev/null
+++ b/gnulib/lib/unistr/u32-to-u8.c
@@ -0,0 +1,130 @@
+/* Convert UTF-32 string to UTF-8 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u32_to_u8
+#define SRC_UNIT uint32_t
+#define DST_UNIT uint8_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ uc = *s++;
+ /* No need to call the safe variant u32_mbtouc, because
+ u8_uctomb will verify uc anyway. */
+
+ /* Store it in the output string. */
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 6 > allocated)
+ allocated = length + 6;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u8_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u32-uctomb.c b/gnulib/lib/unistr/u32-uctomb.c
new file mode 100644
index 0000000..9342806
--- /dev/null
+++ b/gnulib/lib/unistr/u32-uctomb.c
@@ -0,0 +1,47 @@
+/* Store a character in UTF-32 string.
+ Copyright (C) 2002, 2005-2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u32_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u32_uctomb (uint32_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
+ {
+ if (n > 0)
+ {
+ *s = uc;
+ return 1;
+ }
+ else
+ return -2;
+ }
+ else
+ return -1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u8-check.c b/gnulib/lib/unistr/u8-check.c
new file mode 100644
index 0000000..29dc463
--- /dev/null
+++ b/gnulib/lib/unistr/u8-check.c
@@ -0,0 +1,105 @@
+/* Check UTF-8 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_check (const uint8_t *s, size_t n)
+{
+ const uint8_t *s_end = s + n;
+
+ while (s < s_end)
+ {
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ s++;
+ continue;
+ }
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (s + 2 <= s_end
+ && (s[1] ^ 0x80) < 0x40)
+ {
+ s += 2;
+ continue;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (s + 3 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ s += 3;
+ continue;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (s + 4 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ s += 4;
+ continue;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (s + 5 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ s += 5;
+ continue;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (s + 6 <= s_end
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ s += 6;
+ continue;
+ }
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return s;
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u8-chr.c b/gnulib/lib/unistr/u8-chr.c
new file mode 100644
index 0000000..0776d91
--- /dev/null
+++ b/gnulib/lib/unistr/u8-chr.c
@@ -0,0 +1,201 @@
+/* Search character in piece of UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
+{
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ return (uint8_t *) memchr ((const char *) s, c0, n);
+ }
+
+ {
+ uint8_t c[6];
+ size_t uc_size;
+ uc_size = u8_uctomb_aux (c, uc, 6);
+
+ if (n < uc_size)
+ return NULL;
+
+ /* For multibyte character matching we use a Boyer-Moore like
+ algorithm that searches for the last byte, skipping multi-byte
+ jumps, and matches back from there.
+
+ Instead of using a table as is usual for Boyer-Moore, we compare
+ the candidate last byte s[UC_SIZE-1] with each of the possible
+ bytes in the UTF-8 representation of UC. If the final byte does
+ not match, we will perform up to UC_SIZE comparisons per memory
+ load---but each comparison lets us skip one byte in the input!
+
+ If the final byte matches, the "real" Boyer-Moore algorithm
+ is approximated. Instead, u8_chr just looks for other cN that
+ are equal to the final byte and uses those to try realigning to
+ another possible match. For example, when searching for 0xF0
+ 0xAA 0xBB 0xAA it will always skip forward by two bytes, even if
+ the character in the string was for example 0xF1 0xAA 0xBB 0xAA.
+ The advantage of this scheme is that the skip count after a failed
+ match can be computed outside the loop, and that it keeps the
+ complexity low for a pretty rare case. In particular, since c[0]
+ is never between 0x80 and 0xBF, c[0] is never equal to c[UC_SIZE-1]
+ and this is optimal for two-byte UTF-8 characters. */
+ switch (uc_size)
+ {
+ case 2:
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ const uint8_t *end = s + n - 1;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..1] == { c0, c1 }. */
+ uint8_t s1 = s[1];
+ if (s1 == c1)
+ {
+ if (*s == c0)
+ return (uint8_t *) s;
+ else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
+ s += 2;
+ }
+ else
+ {
+ if (s1 == c0)
+ s++;
+ else
+ /* Skip the search at s + 1, because s[1] != c0. */
+ s += 2;
+ }
+ }
+ while (s < end);
+ break;
+ }
+
+ case 3:
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ const uint8_t *end = s + n - 2;
+ size_t skip;
+
+ if (c2 == c1)
+ skip = 1;
+ else
+ skip = 3;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..2] == { c0, c1, c2 }. */
+ uint8_t s2 = s[2];
+ if (s2 == c2)
+ {
+ if (s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
+ s += skip;
+ }
+ else
+ {
+ if (s2 == c1)
+ s++;
+ else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
+ s += 2;
+ else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
+ s += 3;
+ }
+ }
+ while (s < end);
+ break;
+ }
+
+ case 4:
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+ const uint8_t *end = s + n - 3;
+ size_t skip;
+
+ if (c3 == c2)
+ skip = 1;
+ else if (c3 == c1)
+ skip = 2;
+ else
+ skip = 4;
+
+ do
+ {
+ /* Here s < end.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
+ uint8_t s3 = s[3];
+ if (s3 == c3)
+ {
+ if (s[2] == c2 && s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
+ s += skip;
+ }
+ else
+ {
+ if (s3 == c2)
+ s++;
+ else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
+ s += 2;
+ else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
+ s += 3;
+ else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
+ s += 4;
+ }
+ }
+ while (s < end);
+ break;
+ }
+ }
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u8-cmp.c b/gnulib/lib/unistr/u8-cmp.c
new file mode 100644
index 0000000..364e2c1
--- /dev/null
+++ b/gnulib/lib/unistr/u8-cmp.c
@@ -0,0 +1,30 @@
+/* Compare pieces of UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return memcmp ((const char *) s1, (const char *) s2, n);
+}
diff --git a/gnulib/lib/unistr/u8-cmp2.c b/gnulib/lib/unistr/u8-cmp2.c
new file mode 100644
index 0000000..0b640f4
--- /dev/null
+++ b/gnulib/lib/unistr/u8-cmp2.c
@@ -0,0 +1,28 @@
+/* Compare pieces of UTF-8 strings.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include "minmax.h"
+
+#define FUNC u8_cmp2
+#define UNIT uint8_t
+#define U_CMP u8_cmp
+#include "u-cmp2.h"
diff --git a/gnulib/lib/unistr/u8-cpy-alloc.c b/gnulib/lib/unistr/u8-cpy-alloc.c
new file mode 100644
index 0000000..2b67fdb
--- /dev/null
+++ b/gnulib/lib/unistr/u8-cpy-alloc.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_cpy_alloc
+#define UNIT uint8_t
+#include "u-cpy-alloc.h"
diff --git a/gnulib/lib/unistr/u8-cpy.c b/gnulib/lib/unistr/u8-cpy.c
new file mode 100644
index 0000000..ba56565
--- /dev/null
+++ b/gnulib/lib/unistr/u8-cpy.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_cpy
+#define UNIT uint8_t
+#include "u-cpy.h"
diff --git a/gnulib/lib/unistr/u8-endswith.c b/gnulib/lib/unistr/u8-endswith.c
new file mode 100644
index 0000000..eca82e2
--- /dev/null
+++ b/gnulib/lib/unistr/u8-endswith.c
@@ -0,0 +1,27 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_endswith
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_CMP u8_cmp
+#include "u-endswith.h"
diff --git a/gnulib/lib/unistr/u8-mblen.c b/gnulib/lib/unistr/u8-mblen.c
new file mode 100644
index 0000000..0652b41
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mblen.c
@@ -0,0 +1,99 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_mblen (const uint8_t *s, size_t n)
+{
+ if (n > 0)
+ {
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ return (c != 0 ? 1 : 0);
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40
+#endif
+ )
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0)
+#endif
+ )
+ return 3;
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+#endif
+ )
+ return 4;
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ )
+ return 5;
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6
+#if CONFIG_UNICODE_SAFETY
+ && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84)
+#endif
+ )
+ return 6;
+ }
+#endif
+ }
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u8-mbsnlen.c b/gnulib/lib/unistr/u8-mbsnlen.c
new file mode 100644
index 0000000..4d086a4
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbsnlen.c
@@ -0,0 +1,44 @@
+/* Count characters in UTF-8 string.
+ Copyright (C) 2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+size_t
+u8_mbsnlen (const uint8_t *s, size_t n)
+{
+ size_t characters;
+
+ characters = 0;
+ while (n > 0)
+ {
+ ucs4_t uc;
+ int count = u8_mbtoucr (&uc, s, n);
+ characters++;
+ if (count == -2)
+ break;
+ if (count < 0)
+ count = u8_mbtouc (&uc, s, n);
+ else if (count == 0)
+ count = 1;
+ s += count;
+ n -= count;
+ }
+ return characters;
+}
diff --git a/gnulib/lib/unistr/u8-mbtouc-aux.c b/gnulib/lib/unistr/u8-mbtouc-aux.c
new file mode 100644
index 0000000..1ac2957
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbtouc-aux.c
@@ -0,0 +1,240 @@
+/* Conversion UTF-8 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return 1;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u8-mbtouc-unsafe-aux.c b/gnulib/lib/unistr/u8-mbtouc-unsafe-aux.c
new file mode 100644
index 0000000..deea151
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbtouc-unsafe-aux.c
@@ -0,0 +1,260 @@
+/* Conversion UTF-8 to UCS-4.
+ Copyright (C) 2001-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if defined IN_LIBUNISTRING || HAVE_INLINE
+
+int
+u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return 1;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u8-mbtouc-unsafe.c b/gnulib/lib/unistr/u8-mbtouc-unsafe.c
new file mode 100644
index 0000000..b298925
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbtouc-unsafe.c
@@ -0,0 +1,271 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
+ 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return 1;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+#if CONFIG_UNICODE_SAFETY
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+#endif
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u8-mbtouc.c b/gnulib/lib/unistr/u8-mbtouc.c
new file mode 100644
index 0000000..a62539e
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbtouc.c
@@ -0,0 +1,250 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return 1;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 3)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else
+ return 2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 4)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ if (n == 1 || (s[1] ^ 0x80) >= 0x40)
+ return 1;
+ else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
+ return 2;
+ else
+ return 3;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 5)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xf9 || s[1] >= 0x88)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 6)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ if (c >= 0xfd || s[1] >= 0x84)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 6;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 5;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 4;
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 3;
+ }
+ /* invalid multibyte character */
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return n;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return 1;
+}
+
+#endif
diff --git a/gnulib/lib/unistr/u8-mbtoucr.c b/gnulib/lib/unistr/u8-mbtoucr.c
new file mode 100644
index 0000000..ffcc134
--- /dev/null
+++ b/gnulib/lib/unistr/u8-mbtoucr.c
@@ -0,0 +1,285 @@
+/* Look at first character in UTF-8 string, returning an error code.
+ Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
+{
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return 1;
+ }
+ else if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xf0)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xf8)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if (n >= 5)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ else if (c < 0xfe)
+ {
+ if (n >= 2)
+ {
+ if ((s[1] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+ {
+ if (n >= 3)
+ {
+ if ((s[2] ^ 0x80) < 0x40)
+ {
+ if (n >= 4)
+ {
+ if ((s[3] ^ 0x80) < 0x40)
+ {
+ if (n >= 5)
+ {
+ if ((s[4] ^ 0x80) < 0x40)
+ {
+ if (n >= 6)
+ {
+ if ((s[5] ^ 0x80) < 0x40)
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+ /* invalid multibyte character */
+ }
+ else
+ {
+ /* incomplete multibyte character */
+ *puc = 0xfffd;
+ return -2;
+ }
+ }
+#endif
+ }
+ /* invalid multibyte character */
+ *puc = 0xfffd;
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u8-move.c b/gnulib/lib/unistr/u8-move.c
new file mode 100644
index 0000000..cdf5f59
--- /dev/null
+++ b/gnulib/lib/unistr/u8-move.c
@@ -0,0 +1,25 @@
+/* Copy piece of UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_move
+#define UNIT uint8_t
+#include "u-move.h"
diff --git a/gnulib/lib/unistr/u8-next.c b/gnulib/lib/unistr/u8-next.c
new file mode 100644
index 0000000..f6911f9
--- /dev/null
+++ b/gnulib/lib/unistr/u8-next.c
@@ -0,0 +1,37 @@
+/* Iterate over next character in UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_next (ucs4_t *puc, const uint8_t *s)
+{
+ int count;
+
+ count = u8_strmbtouc (puc, s);
+ if (count > 0)
+ return s + count;
+ else
+ {
+ if (count < 0)
+ *puc = 0xfffd;
+ return NULL;
+ }
+}
diff --git a/gnulib/lib/unistr/u8-prev.c b/gnulib/lib/unistr/u8-prev.c
new file mode 100644
index 0000000..29118d6
--- /dev/null
+++ b/gnulib/lib/unistr/u8-prev.c
@@ -0,0 +1,93 @@
+/* Iterate over previous character in UTF-8 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+const uint8_t *
+u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
+{
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
+ if (s != start)
+ {
+ uint8_t c_1 = s[-1];
+
+ if (c_1 < 0x80)
+ {
+ *puc = c_1;
+ return s - 1;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_1 ^ 0x80) < 0x40)
+#endif
+ if (s - 1 != start)
+ {
+ uint8_t c_2 = s[-2];
+
+ if (c_2 >= 0xc2 && c_2 < 0xe0)
+ {
+ *puc = ((unsigned int) (c_2 & 0x1f) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 2;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_2 ^ 0x80) < 0x40)
+#endif
+ if (s - 2 != start)
+ {
+ uint8_t c_3 = s[-3];
+
+ if (c_3 >= 0xe0 && c_3 < 0xf0
+#if CONFIG_UNICODE_SAFETY
+ && (c_3 >= 0xe1 || c_2 >= 0xa0)
+ && (c_3 != 0xed || c_2 < 0xa0)
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c_3 & 0x0f) << 12)
+ | ((unsigned int) (c_2 ^ 0x80) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 3;
+ }
+#if CONFIG_UNICODE_SAFETY
+ if ((c_3 ^ 0x80) < 0x40)
+#endif
+ if (s - 3 != start)
+ {
+ uint8_t c_4 = s[-4];
+
+ if (c_4 >= 0xf0 && c_4 < 0xf8
+#if CONFIG_UNICODE_SAFETY
+ && (c_4 >= 0xf1 || c_3 >= 0x90)
+ && (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90))
+#endif
+ )
+ {
+ *puc = ((unsigned int) (c_4 & 0x07) << 18)
+ | ((unsigned int) (c_3 ^ 0x80) << 12)
+ | ((unsigned int) (c_2 ^ 0x80) << 6)
+ | (unsigned int) (c_1 ^ 0x80);
+ return s - 4;
+ }
+ }
+ }
+ }
+ }
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u8-set.c b/gnulib/lib/unistr/u8-set.c
new file mode 100644
index 0000000..2b7cbb5
--- /dev/null
+++ b/gnulib/lib/unistr/u8-set.c
@@ -0,0 +1,44 @@
+/* Fill UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_set
+#define UNIT uint8_t
+#define IS_SINGLE_UNIT(uc) (uc < 0x80)
+
+#include <errno.h>
+#include <string.h>
+
+UNIT *
+FUNC (UNIT *s, ucs4_t uc, size_t n)
+{
+ if (n > 0)
+ {
+ if (IS_SINGLE_UNIT (uc))
+ memset ((char *) s, uc, n);
+ else
+ {
+ errno = EILSEQ;
+ return NULL;
+ }
+ }
+ return s;
+}
diff --git a/gnulib/lib/unistr/u8-startswith.c b/gnulib/lib/unistr/u8-startswith.c
new file mode 100644
index 0000000..2190cb1
--- /dev/null
+++ b/gnulib/lib/unistr/u8-startswith.c
@@ -0,0 +1,25 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_startswith
+#define UNIT uint8_t
+#include "u-startswith.h"
diff --git a/gnulib/lib/unistr/u8-stpcpy.c b/gnulib/lib/unistr/u8-stpcpy.c
new file mode 100644
index 0000000..97386e8
--- /dev/null
+++ b/gnulib/lib/unistr/u8-stpcpy.c
@@ -0,0 +1,44 @@
+/* Copy UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure stpcpy() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if HAVE_STPCPY
+
+# include <string.h>
+
+uint8_t *
+u8_stpcpy (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) stpcpy ((char *) dest, (const char *) src);
+}
+
+#else
+
+# define FUNC u8_stpcpy
+# define UNIT uint8_t
+# include "u-stpcpy.h"
+
+#endif
diff --git a/gnulib/lib/unistr/u8-stpncpy.c b/gnulib/lib/unistr/u8-stpncpy.c
new file mode 100644
index 0000000..21698b1
--- /dev/null
+++ b/gnulib/lib/unistr/u8-stpncpy.c
@@ -0,0 +1,44 @@
+/* Copy UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure stpncpy() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if __GLIBC__ >= 2 && !defined __UCLIBC__
+
+# include <string.h>
+
+uint8_t *
+u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) stpncpy ((char *) dest, (const char *) src, n);
+}
+
+#else
+
+# define FUNC u8_stpncpy
+# define UNIT uint8_t
+# include "u-stpncpy.h"
+
+#endif
diff --git a/gnulib/lib/unistr/u8-strcat.c b/gnulib/lib/unistr/u8-strcat.c
new file mode 100644
index 0000000..f569426
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strcat.c
@@ -0,0 +1,29 @@
+/* Concatenate UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strcat (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) strcat ((char *) dest, (const char *) src);
+}
diff --git a/gnulib/lib/unistr/u8-strchr.c b/gnulib/lib/unistr/u8-strchr.c
new file mode 100644
index 0000000..0bfbc10
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strchr.c
@@ -0,0 +1,240 @@
+/* Search character in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strchr (const uint8_t *s, ucs4_t uc)
+{
+ uint8_t c[6];
+
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ if (false)
+ {
+ /* Unoptimized code. */
+ for (;;)
+ {
+ uint8_t s0 = *s;
+ if (s0 == c0)
+ return (uint8_t *) s;
+ s++;
+ if (s0 == 0)
+ break;
+ }
+ }
+ else
+ {
+ /* Optimized code.
+ strchr() is often so well optimized, that it's worth the
+ added function call. */
+ return (uint8_t *) strchr ((const char *) s, c0);
+ }
+ }
+ else
+ /* Loops equivalent to strstr, optimized for a specific length (2, 3, 4)
+ of the needle. We use an algorithm similar to Boyer-Moore which
+ is documented in lib/unistr/u8-chr.c. There is additional
+ complication because we need to check after every byte for
+ a NUL byte, but the idea is the same. */
+ switch (u8_uctomb_aux (c, uc, 6))
+ {
+ case 2:
+ if (*s == 0 || s[1] == 0)
+ break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ /* Search for { c0, c1 }. */
+ uint8_t s1 = s[1];
+
+ for (;;)
+ {
+ /* Here s[0] != 0, s[1] != 0.
+ Test whether s[0..1] == { c0, c1 }. */
+ if (s1 == c1)
+ {
+ if (*s == c0)
+ return (uint8_t *) s;
+ else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
+ goto case2_skip2;
+ }
+ else
+ {
+ if (s1 == c0)
+ goto case2_skip1;
+ else
+ /* Skip the search at s + 1, because s[1] != c0. */
+ goto case2_skip2;
+ }
+ case2_skip2:
+ s++;
+ s1 = s[1];
+ if (s[1] == 0)
+ break;
+ case2_skip1:
+ s++;
+ s1 = s[1];
+ if (s[1] == 0)
+ break;
+ }
+ }
+ break;
+
+ case 3:
+ if (*s == 0 || s[1] == 0 || s[2] == 0)
+ break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ /* Search for { c0, c1, c2 }. */
+ uint8_t s2 = s[2];
+
+ for (;;)
+ {
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0.
+ Test whether s[0..2] == { c0, c1, c2 }. */
+ if (s2 == c2)
+ {
+ if (s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
+ if (c2 == c1)
+ goto case3_skip1;
+ else
+ goto case3_skip3;
+ }
+ else
+ {
+ if (s2 == c1)
+ goto case3_skip1;
+ else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
+ goto case3_skip2;
+ else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
+ goto case3_skip3;
+ }
+ case3_skip3:
+ s++;
+ s2 = s[2];
+ if (s[2] == 0)
+ break;
+ case3_skip2:
+ s++;
+ s2 = s[2];
+ if (s[2] == 0)
+ break;
+ case3_skip1:
+ s++;
+ s2 = s[2];
+ if (s[2] == 0)
+ break;
+ }
+ }
+ break;
+
+ case 4:
+ if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
+ break;
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+ /* Search for { c0, c1, c2, c3 }. */
+ uint8_t s3 = s[3];
+
+ for (;;)
+ {
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
+ if (s3 == c3)
+ {
+ if (s[2] == c2 && s[1] == c1 && *s == c0)
+ return (uint8_t *) s;
+ else
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
+ if (c3 == c2)
+ goto case4_skip1;
+ else if (c3 == c1)
+ goto case4_skip2;
+ else
+ goto case4_skip4;
+ }
+ else
+ {
+ if (s3 == c2)
+ goto case4_skip1;
+ else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
+ goto case4_skip2;
+ else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
+ goto case4_skip3;
+ else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
+ goto case4_skip4;
+ }
+ case4_skip4:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip3:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip2:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ case4_skip1:
+ s++;
+ s3 = s[3];
+ if (s[3] == 0)
+ break;
+ }
+ }
+ break;
+ }
+
+ return NULL;
+}
diff --git a/gnulib/lib/unistr/u8-strcmp.c b/gnulib/lib/unistr/u8-strcmp.c
new file mode 100644
index 0000000..c4968b7
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strcmp.c
@@ -0,0 +1,30 @@
+/* Compare UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_strcmp (const uint8_t *s1, const uint8_t *s2)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return strcmp ((const char *) s1, (const char *) s2);
+}
diff --git a/gnulib/lib/unistr/u8-strcoll.c b/gnulib/lib/unistr/u8-strcoll.c
new file mode 100644
index 0000000..6918aa6
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strcoll.c
@@ -0,0 +1,33 @@
+/* Compare UTF-8 strings using the collation rules of the current locale.
+ Copyright (C) 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uniconv.h"
+
+#define FUNC u8_strcoll
+#define UNIT uint8_t
+#define U_STRCMP u8_strcmp
+#define U_STRCONV_TO_ENCODING u8_strconv_to_encoding
+#include "u-strcoll.h"
diff --git a/gnulib/lib/unistr/u8-strcpy.c b/gnulib/lib/unistr/u8-strcpy.c
new file mode 100644
index 0000000..39fcb99
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strcpy.c
@@ -0,0 +1,29 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strcpy (uint8_t *dest, const uint8_t *src)
+{
+ return (uint8_t *) strcpy ((char *) dest, (const char *) src);
+}
diff --git a/gnulib/lib/unistr/u8-strcspn.c b/gnulib/lib/unistr/u8-strcspn.c
new file mode 100644
index 0000000..96d18e7
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strcspn.c
@@ -0,0 +1,28 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strcspn
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_STRMBTOUC u8_strmbtouc
+#define U_STRCHR u8_strchr
+#include "u-strcspn.h"
diff --git a/gnulib/lib/unistr/u8-strdup.c b/gnulib/lib/unistr/u8-strdup.c
new file mode 100644
index 0000000..7db00ba
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strdup.c
@@ -0,0 +1,40 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if HAVE_STRDUP
+
+# include <string.h>
+
+uint8_t *
+u8_strdup (const uint8_t *s)
+{
+ return (uint8_t *) strdup ((const char *) s);
+}
+
+#else
+
+# define FUNC u8_strdup
+# define UNIT uint8_t
+# define U_STRLEN u8_strlen
+# include "u-strdup.h"
+
+#endif
diff --git a/gnulib/lib/unistr/u8-strlen.c b/gnulib/lib/unistr/u8-strlen.c
new file mode 100644
index 0000000..158213f
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strlen.c
@@ -0,0 +1,29 @@
+/* Determine length of UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+size_t
+u8_strlen (const uint8_t *s)
+{
+ return strlen ((const char *) s);
+}
diff --git a/gnulib/lib/unistr/u8-strmblen.c b/gnulib/lib/unistr/u8-strmblen.c
new file mode 100644
index 0000000..ecf5d96
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strmblen.c
@@ -0,0 +1,97 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_strmblen (const uint8_t *s)
+{
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ return (c != 0 ? 1 : 0);
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#else
+ if (s[1] != 0)
+#endif
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#else
+ if (s[1] != 0 && s[2] != 0)
+#endif
+ return 3;
+ }
+ else if (c < 0xf8)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0)
+#endif
+ return 4;
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0)
+#endif
+ return 5;
+ }
+ else if (c < 0xfe)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0)
+#endif
+ return 6;
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u8-strmbtouc.c b/gnulib/lib/unistr/u8-strmbtouc.c
new file mode 100644
index 0000000..e15a7a1
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strmbtouc.c
@@ -0,0 +1,130 @@
+/* Look at first character in UTF-8 string.
+ Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2011 Free Software
+ Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_strmbtouc (ucs4_t *puc, const uint8_t *s)
+{
+ /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
+ uint8_t c = *s;
+
+ if (c < 0x80)
+ {
+ *puc = c;
+ return (c != 0 ? 1 : 0);
+ }
+ if (c >= 0xc2)
+ {
+ if (c < 0xe0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40)
+#else
+ if (s[1] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x1f) << 6)
+ | (unsigned int) (s[1] ^ 0x80);
+ return 2;
+ }
+ }
+ else if (c < 0xf0)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (c >= 0xe1 || s[1] >= 0xa0)
+ && (c != 0xed || s[1] < 0xa0))
+#else
+ if (s[1] != 0 && s[2] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x0f) << 12)
+ | ((unsigned int) (s[1] ^ 0x80) << 6)
+ | (unsigned int) (s[2] ^ 0x80);
+ return 3;
+ }
+ }
+ else if (c < 0xf8)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40
+ && (c >= 0xf1 || s[1] >= 0x90)
+#if 1
+ && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
+#endif
+ )
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x07) << 18)
+ | ((unsigned int) (s[1] ^ 0x80) << 12)
+ | ((unsigned int) (s[2] ^ 0x80) << 6)
+ | (unsigned int) (s[3] ^ 0x80);
+ return 4;
+ }
+ }
+#if 0
+ else if (c < 0xfc)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (c >= 0xf9 || s[1] >= 0x88))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x03) << 24)
+ | ((unsigned int) (s[1] ^ 0x80) << 18)
+ | ((unsigned int) (s[2] ^ 0x80) << 12)
+ | ((unsigned int) (s[3] ^ 0x80) << 6)
+ | (unsigned int) (s[4] ^ 0x80);
+ return 5;
+ }
+ }
+ else if (c < 0xfe)
+ {
+#if CONFIG_UNICODE_SAFETY
+ if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
+ && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
+ && (s[5] ^ 0x80) < 0x40
+ && (c >= 0xfd || s[1] >= 0x84))
+#else
+ if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0)
+#endif
+ {
+ *puc = ((unsigned int) (c & 0x01) << 30)
+ | ((unsigned int) (s[1] ^ 0x80) << 24)
+ | ((unsigned int) (s[2] ^ 0x80) << 18)
+ | ((unsigned int) (s[3] ^ 0x80) << 12)
+ | ((unsigned int) (s[4] ^ 0x80) << 6)
+ | (unsigned int) (s[5] ^ 0x80);
+ return 6;
+ }
+ }
+#endif
+ }
+ /* invalid or incomplete multibyte character */
+ return -1;
+}
diff --git a/gnulib/lib/unistr/u8-strncat.c b/gnulib/lib/unistr/u8-strncat.c
new file mode 100644
index 0000000..90ac4e0
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strncat.c
@@ -0,0 +1,29 @@
+/* Concatenate UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strncat (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) strncat ((char *) dest, (const char *) src, n);
+}
diff --git a/gnulib/lib/unistr/u8-strncmp.c b/gnulib/lib/unistr/u8-strncmp.c
new file mode 100644
index 0000000..036d899
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strncmp.c
@@ -0,0 +1,30 @@
+/* Compare UTF-8 strings.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+int
+u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
+{
+ /* Use the fact that the UTF-8 encoding preserves lexicographic order. */
+ return strncmp ((const char *) s1, (const char *) s2, n);
+}
diff --git a/gnulib/lib/unistr/u8-strncpy.c b/gnulib/lib/unistr/u8-strncpy.c
new file mode 100644
index 0000000..040d3a0
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strncpy.c
@@ -0,0 +1,29 @@
+/* Copy UTF-8 string.
+ Copyright (C) 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+uint8_t *
+u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n)
+{
+ return (uint8_t *) strncpy ((char *) dest, (const char *) src, n);
+}
diff --git a/gnulib/lib/unistr/u8-strnlen.c b/gnulib/lib/unistr/u8-strnlen.c
new file mode 100644
index 0000000..b82af62
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strnlen.c
@@ -0,0 +1,44 @@
+/* Determine bounded length of UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Ensure strnlen() gets declared. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#if __GLIBC__ >= 2 || defined __UCLIBC__
+
+# include <string.h>
+
+size_t
+u8_strnlen (const uint8_t *s, size_t maxlen)
+{
+ return strnlen ((const char *) s, maxlen);
+}
+
+#else
+
+# define FUNC u8_strnlen
+# define UNIT uint8_t
+# include "u-strnlen.h"
+
+#endif
diff --git a/gnulib/lib/unistr/u8-strpbrk.c b/gnulib/lib/unistr/u8-strpbrk.c
new file mode 100644
index 0000000..b7d38f7
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strpbrk.c
@@ -0,0 +1,27 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strpbrk
+#define UNIT uint8_t
+#define U_STRMBTOUC u8_strmbtouc
+#define U_STRCHR u8_strchr
+#include "u-strpbrk.h"
diff --git a/gnulib/lib/unistr/u8-strrchr.c b/gnulib/lib/unistr/u8-strrchr.c
new file mode 100644
index 0000000..38beebe
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strrchr.c
@@ -0,0 +1,102 @@
+/* Search character in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006-2007, 2009-2011 Free Software Foundation,
+ Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+uint8_t *
+u8_strrchr (const uint8_t *s, ucs4_t uc)
+{
+ /* Calling u8_strlen and then searching from the other end would cause more
+ memory accesses. Avoid that, at the cost of a few more comparisons. */
+ uint8_t *result = NULL;
+ uint8_t c[6];
+
+ if (uc < 0x80)
+ {
+ uint8_t c0 = uc;
+
+ for (;; s++)
+ {
+ if (*s == c0)
+ result = (uint8_t *) s;
+ if (*s == 0)
+ break;
+ }
+ }
+ else
+ switch (u8_uctomb_aux (c, uc, 6))
+ {
+ case 2:
+ if (*s)
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[1] == 0)
+ break;
+ if (*s == c0 && s[1] == c1)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 3:
+ if (*s && s[1])
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[2] == 0)
+ break;
+ if (*s == c0 && s[1] == c1 && s[2] == c2)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+
+ case 4:
+ if (*s && s[1] && s[2])
+ {
+ uint8_t c0 = c[0];
+ uint8_t c1 = c[1];
+ uint8_t c2 = c[2];
+ uint8_t c3 = c[3];
+
+ /* FIXME: Maybe walking the string via u8_mblen is a win? */
+ for (;; s++)
+ {
+ if (s[3] == 0)
+ break;
+ if (*s == c0 && s[1] == c1 && s[2] == c2 && s[3] == c3)
+ result = (uint8_t *) s;
+ }
+ }
+ break;
+ }
+ return result;
+}
diff --git a/gnulib/lib/unistr/u8-strspn.c b/gnulib/lib/unistr/u8-strspn.c
new file mode 100644
index 0000000..8842557
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strspn.c
@@ -0,0 +1,29 @@
+/* Search for some characters in UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strspn
+#define UNIT uint8_t
+#define U_STRLEN u8_strlen
+#define U_STRMBTOUC u8_strmbtouc
+#define U_CMP u8_cmp
+#define U_STRCHR u8_strchr
+#include "u-strspn.h"
diff --git a/gnulib/lib/unistr/u8-strstr.c b/gnulib/lib/unistr/u8-strstr.c
new file mode 100644
index 0000000..3f725cb
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strstr.c
@@ -0,0 +1,32 @@
+/* Substring test for UTF-8 strings.
+ Copyright (C) 1999, 2002, 2006, 2010-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#include <string.h>
+
+/* FIXME: Maybe walking the string via u8_mblen is a win? */
+
+#define FUNC u8_strstr
+#define UNIT uint8_t
+#define U_STRCHR u8_strchr
+#define U_STRMBTOUC u8_strmbtouc
+#define UNIT_IS_UINT8_T 1
+#include "u-strstr.h"
diff --git a/gnulib/lib/unistr/u8-strtok.c b/gnulib/lib/unistr/u8-strtok.c
new file mode 100644
index 0000000..7f6c1ec
--- /dev/null
+++ b/gnulib/lib/unistr/u8-strtok.c
@@ -0,0 +1,27 @@
+/* Tokenize UTF-8 string.
+ Copyright (C) 1999, 2002, 2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_strtok
+#define UNIT uint8_t
+#define U_STRSPN u8_strspn
+#define U_STRPBRK u8_strpbrk
+#include "u-strtok.h"
diff --git a/gnulib/lib/unistr/u8-to-u16.c b/gnulib/lib/unistr/u8-to-u16.c
new file mode 100644
index 0000000..00426e6
--- /dev/null
+++ b/gnulib/lib/unistr/u8-to-u16.c
@@ -0,0 +1,136 @@
+/* Convert UTF-8 string to UTF-16 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_to_u16
+#define SRC_UNIT uint8_t
+#define DST_UNIT uint16_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u8_mbtoucr (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count == -1)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ if (count == -2)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 2 > allocated)
+ allocated = length + 2;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ count = u16_uctomb (result + length, uc, allocated - length);
+ if (count < 0)
+ abort ();
+ }
+ length += count;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u8-to-u32.c b/gnulib/lib/unistr/u8-to-u32.c
new file mode 100644
index 0000000..f208975
--- /dev/null
+++ b/gnulib/lib/unistr/u8-to-u32.c
@@ -0,0 +1,125 @@
+/* Convert UTF-8 string to UTF-32 string.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+#define FUNC u8_to_u32
+#define SRC_UNIT uint8_t
+#define DST_UNIT uint32_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+ const SRC_UNIT *s_end = s + n;
+ /* Output string accumulator. */
+ DST_UNIT *result;
+ size_t allocated;
+ size_t length;
+
+ if (resultbuf != NULL)
+ {
+ result = resultbuf;
+ allocated = *lengthp;
+ }
+ else
+ {
+ result = NULL;
+ allocated = 0;
+ }
+ length = 0;
+ /* Invariants:
+ result is either == resultbuf or == NULL or malloc-allocated.
+ If length > 0, then result != NULL. */
+
+ while (s < s_end)
+ {
+ ucs4_t uc;
+ int count;
+
+ /* Fetch a Unicode character from the input string. */
+ count = u8_mbtoucr (&uc, s, s_end - s);
+ if (count < 0)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = EILSEQ;
+ return NULL;
+ }
+ s += count;
+
+ /* Store it in the output string. */
+ if (length + 1 > allocated)
+ {
+ DST_UNIT *memory;
+
+ allocated = (allocated > 0 ? 2 * allocated : 12);
+ if (length + 1 > allocated)
+ allocated = length + 1;
+ if (result == resultbuf || result == NULL)
+ memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+ else
+ memory =
+ (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+ if (memory == NULL)
+ {
+ if (!(result == resultbuf || result == NULL))
+ free (result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (result == resultbuf && length > 0)
+ memcpy ((char *) memory, (char *) result,
+ length * sizeof (DST_UNIT));
+ result = memory;
+ }
+ result[length++] = uc;
+ }
+
+ if (length == 0)
+ {
+ if (result == NULL)
+ {
+ /* Return a non-NULL value. NULL means error. */
+ result = (DST_UNIT *) malloc (1);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ }
+ }
+ else if (result != resultbuf && length < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ DST_UNIT *memory;
+
+ memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+ if (memory != NULL)
+ result = memory;
+ }
+
+ *lengthp = length;
+ return result;
+}
diff --git a/gnulib/lib/unistr/u8-uctomb-aux.c b/gnulib/lib/unistr/u8-uctomb-aux.c
new file mode 100644
index 0000000..2014c8c
--- /dev/null
+++ b/gnulib/lib/unistr/u8-uctomb-aux.c
@@ -0,0 +1,69 @@
+/* Conversion UCS-4 to UTF-8.
+ Copyright (C) 2002, 2006-2007, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unistr.h"
+
+int
+u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n)
+{
+ int count;
+
+ if (uc < 0x80)
+ /* The case n >= 1 is already handled by the caller. */
+ return -2;
+ else if (uc < 0x800)
+ count = 2;
+ else if (uc < 0x10000)
+ {
+ if (uc < 0xd800 || uc >= 0xe000)
+ count = 3;
+ else
+ return -1;
+ }
+#if 0
+ else if (uc < 0x200000)
+ count = 4;
+ else if (uc < 0x4000000)
+ count = 5;
+ else if (uc <= 0x7fffffff)
+ count = 6;
+#else
+ else if (uc < 0x110000)
+ count = 4;
+#endif
+ else
+ return -1;
+
+ if (n < count)
+ return -2;
+
+ switch (count) /* note: code falls through cases! */
+ {
+#if 0
+ case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
+ case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
+#endif
+ case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
+ case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
+ case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
+ /*case 1:*/ s[0] = uc;
+ }
+ return count;
+}
diff --git a/gnulib/lib/unistr/u8-uctomb.c b/gnulib/lib/unistr/u8-uctomb.c
new file mode 100644
index 0000000..65a4053
--- /dev/null
+++ b/gnulib/lib/unistr/u8-uctomb.c
@@ -0,0 +1,88 @@
+/* Store a character in UTF-8 string.
+ Copyright (C) 2002, 2005-2006, 2009-2011 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#if defined IN_LIBUNISTRING
+/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */
+# include "unistring-notinline.h"
+#endif
+
+/* Specification. */
+#include "unistr.h"
+
+#if !HAVE_INLINE
+
+int
+u8_uctomb (uint8_t *s, ucs4_t uc, int n)
+{
+ if (uc < 0x80)
+ {
+ if (n > 0)
+ {
+ s[0] = uc;
+ return 1;
+ }
+ /* else return -2, below. */
+ }
+ else
+ {
+ int count;
+
+ if (uc < 0x800)
+ count = 2;
+ else if (uc < 0x10000)
+ {
+ if (uc < 0xd800 || uc >= 0xe000)
+ count = 3;
+ else
+ return -1;
+ }
+#if 0
+ else if (uc < 0x200000)
+ count = 4;
+ else if (uc < 0x4000000)
+ count = 5;
+ else if (uc <= 0x7fffffff)
+ count = 6;
+#else
+ else if (uc < 0x110000)
+ count = 4;
+#endif
+ else
+ return -1;
+
+ if (n >= count)
+ {
+ switch (count) /* note: code falls through cases! */
+ {
+#if 0
+ case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
+ case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
+#endif
+ case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
+ case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
+ case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
+ /*case 1:*/ s[0] = uc;
+ }
+ return count;
+ }
+ }
+ return -2;
+}
+
+#endif