summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog94
-rw-r--r--lib/unigbrk.in.h45
-rw-r--r--lib/unigbrk/u16-grapheme-breaks.c45
-rw-r--r--lib/unigbrk/u16-grapheme-len.c47
-rw-r--r--lib/unigbrk/u16-grapheme-next.c46
-rw-r--r--lib/unigbrk/u16-grapheme-prev.c55
-rw-r--r--lib/unigbrk/u32-grapheme-breaks.c42
-rw-r--r--lib/unigbrk/u32-grapheme-len.c47
-rw-r--r--lib/unigbrk/u32-grapheme-next.c46
-rw-r--r--lib/unigbrk/u32-grapheme-prev.c51
-rw-r--r--lib/unigbrk/u8-grapheme-breaks.c46
-rw-r--r--lib/unigbrk/u8-grapheme-len.c47
-rw-r--r--lib/unigbrk/u8-grapheme-next.c46
-rw-r--r--lib/unigbrk/u8-grapheme-prev.c55
-rw-r--r--lib/unigbrk/ulc-grapheme-breaks.c130
-rw-r--r--m4/locale-ar.m469
-rw-r--r--modules/unigbrk/u16-grapheme-breaks28
-rw-r--r--modules/unigbrk/u16-grapheme-breaks-tests12
-rw-r--r--modules/unigbrk/u16-grapheme-len28
-rw-r--r--modules/unigbrk/u16-grapheme-len-tests12
-rw-r--r--modules/unigbrk/u16-grapheme-next28
-rw-r--r--modules/unigbrk/u16-grapheme-next-tests12
-rw-r--r--modules/unigbrk/u16-grapheme-prev28
-rw-r--r--modules/unigbrk/u16-grapheme-prev-tests12
-rw-r--r--modules/unigbrk/u32-grapheme-breaks28
-rw-r--r--modules/unigbrk/u32-grapheme-breaks-tests12
-rw-r--r--modules/unigbrk/u32-grapheme-len28
-rw-r--r--modules/unigbrk/u32-grapheme-len-tests12
-rw-r--r--modules/unigbrk/u32-grapheme-next28
-rw-r--r--modules/unigbrk/u32-grapheme-next-tests12
-rw-r--r--modules/unigbrk/u32-grapheme-prev28
-rw-r--r--modules/unigbrk/u32-grapheme-prev-tests12
-rw-r--r--modules/unigbrk/u8-grapheme-breaks28
-rw-r--r--modules/unigbrk/u8-grapheme-breaks-tests12
-rw-r--r--modules/unigbrk/u8-grapheme-len28
-rw-r--r--modules/unigbrk/u8-grapheme-len-tests12
-rw-r--r--modules/unigbrk/u8-grapheme-next28
-rw-r--r--modules/unigbrk/u8-grapheme-next-tests12
-rw-r--r--modules/unigbrk/u8-grapheme-prev28
-rw-r--r--modules/unigbrk/u8-grapheme-prev-tests12
-rw-r--r--modules/unigbrk/ulc-grapheme-breaks30
-rw-r--r--modules/unigbrk/ulc-grapheme-breaks-tests18
-rw-r--r--tests/unigbrk/test-u16-grapheme-breaks.c105
-rw-r--r--tests/unigbrk/test-u16-grapheme-len.c95
-rw-r--r--tests/unigbrk/test-u16-grapheme-next.c102
-rw-r--r--tests/unigbrk/test-u16-grapheme-prev.c104
-rw-r--r--tests/unigbrk/test-u32-grapheme-breaks.c105
-rw-r--r--tests/unigbrk/test-u32-grapheme-len.c96
-rw-r--r--tests/unigbrk/test-u32-grapheme-next.c103
-rw-r--r--tests/unigbrk/test-u32-grapheme-prev.c105
-rw-r--r--tests/unigbrk/test-u8-grapheme-breaks.c96
-rw-r--r--tests/unigbrk/test-u8-grapheme-len.c52
-rw-r--r--tests/unigbrk/test-u8-grapheme-next.c78
-rw-r--r--tests/unigbrk/test-u8-grapheme-prev.c79
-rw-r--r--tests/unigbrk/test-ulc-grapheme-breaks.c86
-rwxr-xr-xtests/unigbrk/test-ulc-grapheme-breaks.sh15
56 files changed, 2660 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 97214e7d4e..7acf4b618b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,99 @@
2010-12-31 Ben Pfaff <blp@cs.stanford.edu>
+ New module 'u8-grapheme-len'.
+ * modules/unigbrk/u8-grapheme-len: New file.
+ * modules/unigbrk/u8-grapheme-len-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u8-grapheme-len.c: New file.
+ * tests/unigbrk/test-u8-grapheme-len.c: New file.
+
+ New module 'u16-grapheme-len'.
+ * modules/unigbrk/u16-grapheme-len: New file.
+ * modules/unigbrk/u16-grapheme-len-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u16-grapheme-len.c: New file.
+ * tests/unigbrk/test-u16-grapheme-len.c: New file.
+
+ New module 'u32-grapheme-len'.
+ * modules/unigbrk/u32-grapheme-len: New file.
+ * modules/unigbrk/u32-grapheme-len-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u32-grapheme-len.c: New file.
+ * tests/unigbrk/test-u32-grapheme-len.c: New file.
+
+ New module 'u8-grapheme-next'.
+ * modules/unigbrk/u8-grapheme-next: New file.
+ * modules/unigbrk/u8-grapheme-next-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u8-grapheme-next.c: New file.
+ * tests/unigbrk/test-u8-grapheme-next.c: New file.
+
+ New module 'u16-grapheme-next'.
+ * modules/unigbrk/u16-grapheme-next: New file.
+ * modules/unigbrk/u16-grapheme-next-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u16-grapheme-next.c: New file.
+ * tests/unigbrk/test-u16-grapheme-next.c: New file.
+
+ New module 'u32-grapheme-next'.
+ * modules/unigbrk/u32-grapheme-next: New file.
+ * modules/unigbrk/u32-grapheme-next-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u32-grapheme-next.c: New file.
+ * tests/unigbrk/test-u32-grapheme-next.c: New file.
+
+ New module 'u8-grapheme-prev'.
+ * modules/unigbrk/u8-grapheme-prev: New file.
+ * modules/unigbrk/u8-grapheme-prev-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u8-grapheme-prev.c: New file.
+ * tests/unigbrk/test-u8-grapheme-prev.c: New file.
+
+ New module 'u16-grapheme-prev'.
+ * modules/unigbrk/u16-grapheme-prev: New file.
+ * modules/unigbrk/u16-grapheme-prev-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u16-grapheme-prev.c: New file.
+ * tests/unigbrk/test-u16-grapheme-prev.c: New file.
+
+ New module 'u32-grapheme-prev'.
+ * modules/unigbrk/u32-grapheme-prev: New file.
+ * modules/unigbrk/u32-grapheme-prev-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u32-grapheme-prev.c: New file.
+ * tests/unigbrk/test-u32-grapheme-prev.c: New file.
+
+ New module 'u8-grapheme-breaks'.
+ * modules/unigbrk/u8-grapheme-breaks: New file.
+ * modules/unigbrk/u8-grapheme-breaks-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u8-grapheme-breaks.c: New file.
+ * tests/unigbrk/test-u8-grapheme-breaks.c: New file.
+
+ New module 'u16-grapheme-breaks'.
+ * modules/unigbrk/u16-grapheme-breaks: New file.
+ * modules/unigbrk/u16-grapheme-breaks-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u16-grapheme-breaks.c: New file.
+ * tests/unigbrk/test-u16-grapheme-breaks.c: New file.
+
+ New module 'u32-grapheme-breaks'.
+ * modules/unigbrk/u32-grapheme-breaks: New file.
+ * modules/unigbrk/u32-grapheme-breaks-tests: New file.
+ * lib/unigbrk.in.h: Add prototype for new function.
+ * lib/unigbrk/u32-grapheme-breaks.c: New file.
+ * tests/unigbrk/test-u32-grapheme-breaks.c: New file.
+
+ New module 'ulc-grapheme-breaks'.
+ * modules/unigbrk/ulc-grapheme-breaks: New file.
+ * modules/unigbrk/ulc-grapheme-breaks-tests: New file.
+ * m4/locale-ar.m4: New file.
+ * lib/unigbrk/ulc-grapheme-breaks.c: New file.
+ * tests/unigbrk/test-ulc-grapheme-breaks.c: New file.
+ * tests/unigbrk/test-ulc-grapheme-breaks.sh: New file.
+
+2010-12-31 Ben Pfaff <blp@cs.stanford.edu>
+
gbrkprop: Fix implementation of uc_graphemeclusterbreak_property.
* lib/unigbrk/gbrkprop.h: Regenerate with gen-uni-tables.c. I had
modified how this file was generated before I initially submitted
diff --git a/lib/unigbrk.in.h b/lib/unigbrk.in.h
index c6056b3760..5310d6fc6f 100644
--- a/lib/unigbrk.in.h
+++ b/lib/unigbrk.in.h
@@ -21,6 +21,9 @@
/* Get bool. */
#include <stdbool.h>
+/* Get size_t. */
+#include <stddef.h>
+
#include "unitypes.h"
#ifdef __cplusplus
@@ -75,6 +78,48 @@ extern int
extern bool
uc_is_grapheme_cluster_break (ucs4_t a, ucs4_t b);
+/* Returns the length (in units) of the first grapheme cluster in the N units
+ in S. If the return value is N, then extending the string could extend the
+ length of the grapheme cluster too. Returns 0 only if N is zero. */
+extern size_t
+ u8_grapheme_len (const uint8_t *s, size_t n);
+extern size_t
+ u16_grapheme_len (const uint16_t *s, size_t n);
+extern size_t
+ u32_grapheme_len (const uint32_t *s, size_t n);
+
+/* Returns the start of the next grapheme cluster following S, or NULL if the
+ end of the string has ben reached. */
+extern const uint8_t *
+ u8_grapheme_next (const uint8_t *s, const uint8_t *end);
+extern const uint16_t *
+ u16_grapheme_next (const uint16_t *s, const uint16_t *end);
+extern const uint32_t *
+ u32_grapheme_next (const uint32_t *s, const uint32_t *end);
+
+/* Returns the start of the previous grapheme cluster before S, or NULL if the
+ start of the string has ben reached. */
+extern const uint8_t *
+ u8_grapheme_prev (const uint8_t *s, const uint8_t *start);
+extern const uint16_t *
+ u16_grapheme_prev (const uint16_t *s, const uint16_t *start);
+extern const uint32_t *
+ u32_grapheme_prev (const uint32_t *s, const uint32_t *start);
+
+/* Determine the grapheme cluster boundaries in S, and store the result at
+ p[0..n-1]. p[i] = 1 means that a new grapheme cluster begins at s[i]. p[i]
+ = 0 means that s[i-1] and s[i] are part of the same grapheme cluster. p[0]
+ will always be 1.
+ */
+extern void
+ u8_grapheme_breaks (const uint8_t *s, size_t n, char *p);
+extern void
+ u16_grapheme_breaks (const uint16_t *s, size_t n, char *p);
+extern void
+ u32_grapheme_breaks (const uint32_t *s, size_t n, char *p);
+extern void
+ ulc_grapheme_breaks (const char *s, size_t n, char *p);
+
/* ========================================================================= */
#ifdef __cplusplus
diff --git a/lib/unigbrk/u16-grapheme-breaks.c b/lib/unigbrk/u16-grapheme-breaks.c
new file mode 100644
index 0000000000..38e01c29ef
--- /dev/null
+++ b/lib/unigbrk/u16-grapheme-breaks.c
@@ -0,0 +1,45 @@
+/* Grapheme cluster breaks function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+void
+u16_grapheme_breaks (const uint16_t *s, size_t n, char *p)
+{
+ ucs4_t prev;
+ int mblen;
+
+ prev = 0;
+ for (; n > 0; s += mblen, p += mblen, n -= mblen)
+ {
+ ucs4_t next;
+ int i;
+
+ mblen = u16_mbtouc (&next, s, n);
+
+ p[0] = uc_is_grapheme_cluster_break (prev, next);
+ if (mblen > 1)
+ p[1] = 0;
+
+ prev = next;
+ }
+}
diff --git a/lib/unigbrk/u16-grapheme-len.c b/lib/unigbrk/u16-grapheme-len.c
new file mode 100644
index 0000000000..6960227de3
--- /dev/null
+++ b/lib/unigbrk/u16-grapheme-len.c
@@ -0,0 +1,47 @@
+/* Grapheme cluster length function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+size_t
+u16_grapheme_len (const uint16_t *s, size_t n)
+{
+ ucs4_t prev;
+ size_t ofs;
+ int mblen;
+
+ if (n == 0)
+ return 0;
+
+ for (ofs = u16_mbtouc (&prev, s, n); ofs < n; ofs += mblen)
+ {
+ ucs4_t next;
+
+ mblen = u16_mbtouc (&next, s + ofs, n - ofs);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return ofs;
+}
diff --git a/lib/unigbrk/u16-grapheme-next.c b/lib/unigbrk/u16-grapheme-next.c
new file mode 100644
index 0000000000..49bee8b1ae
--- /dev/null
+++ b/lib/unigbrk/u16-grapheme-next.c
@@ -0,0 +1,46 @@
+/* Next grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint16_t *
+u16_grapheme_next (const uint16_t *s, const uint16_t *end)
+{
+ ucs4_t prev;
+ int mblen;
+
+ if (s == end)
+ return NULL;
+
+ for (s += u16_mbtouc (&prev, s, end - s); s != end; s += mblen)
+ {
+ ucs4_t next;
+
+ mblen = u16_mbtouc (&next, s, end - s);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/u16-grapheme-prev.c b/lib/unigbrk/u16-grapheme-prev.c
new file mode 100644
index 0000000000..08e74d887c
--- /dev/null
+++ b/lib/unigbrk/u16-grapheme-prev.c
@@ -0,0 +1,55 @@
+/* Previous grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint16_t *
+u16_grapheme_prev (const uint16_t *s, const uint16_t *start)
+{
+ ucs4_t next;
+ int mblen;
+
+ if (s == start)
+ return NULL;
+
+ s = u16_prev (&next, s, start);
+ while (s != start)
+ {
+ const uint16_t *prev_s;
+ ucs4_t prev;
+
+ prev_s = u16_prev (&prev, s, start);
+ if (prev_s == NULL)
+ {
+ /* Ill-formed UTF-16 encoding. */
+ return start;
+ }
+
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ s = prev_s;
+ next = prev;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/u32-grapheme-breaks.c b/lib/unigbrk/u32-grapheme-breaks.c
new file mode 100644
index 0000000000..358fd067b1
--- /dev/null
+++ b/lib/unigbrk/u32-grapheme-breaks.c
@@ -0,0 +1,42 @@
+/* Grapheme cluster breaks function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+void
+u32_grapheme_breaks (const uint32_t *s, size_t n, char *p)
+{
+ ucs4_t prev;
+ size_t i;
+
+ prev = 0;
+ for (i = 0; i < n; i++)
+ {
+ ucs4_t next;
+
+ u32_mbtouc (&next, &s[i], 1);
+
+ p[i] = uc_is_grapheme_cluster_break (prev, next);
+
+ prev = next;
+ }
+}
diff --git a/lib/unigbrk/u32-grapheme-len.c b/lib/unigbrk/u32-grapheme-len.c
new file mode 100644
index 0000000000..4a6f5e77e8
--- /dev/null
+++ b/lib/unigbrk/u32-grapheme-len.c
@@ -0,0 +1,47 @@
+/* Grapheme cluster length function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+size_t
+u32_grapheme_len (const uint32_t *s, size_t n)
+{
+ ucs4_t prev;
+ size_t ofs;
+
+ if (n == 0)
+ return 0;
+
+ u32_mbtouc (&prev, s, n);
+ for (ofs = 1; ofs < n; ofs++)
+ {
+ ucs4_t next;
+
+ u32_mbtouc (&next, s + ofs, n - ofs);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return ofs;
+}
diff --git a/lib/unigbrk/u32-grapheme-next.c b/lib/unigbrk/u32-grapheme-next.c
new file mode 100644
index 0000000000..d9b6ce40f6
--- /dev/null
+++ b/lib/unigbrk/u32-grapheme-next.c
@@ -0,0 +1,46 @@
+/* Next grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint32_t *
+u32_grapheme_next (const uint32_t *s, const uint32_t *end)
+{
+ ucs4_t prev;
+
+ if (s == end)
+ return NULL;
+
+ u32_mbtouc (&prev, s, end - s);
+ for (s++; s != end; s++)
+ {
+ ucs4_t next;
+
+ u32_mbtouc (&next, s, end - s);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/u32-grapheme-prev.c b/lib/unigbrk/u32-grapheme-prev.c
new file mode 100644
index 0000000000..587ab4e222
--- /dev/null
+++ b/lib/unigbrk/u32-grapheme-prev.c
@@ -0,0 +1,51 @@
+/* Previous grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint32_t *
+u32_grapheme_prev (const uint32_t *s, const uint32_t *start)
+{
+ ucs4_t next;
+
+ if (s == start)
+ return NULL;
+
+ u32_prev (&next, s, start);
+ for (s--; s != start; s--)
+ {
+ ucs4_t prev;
+
+ if (u32_prev (&prev, s, start) == NULL)
+ {
+ /* Ill-formed UTF-32 encoding. */
+ return start;
+ }
+
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ next = prev;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/u8-grapheme-breaks.c b/lib/unigbrk/u8-grapheme-breaks.c
new file mode 100644
index 0000000000..811d95ea91
--- /dev/null
+++ b/lib/unigbrk/u8-grapheme-breaks.c
@@ -0,0 +1,46 @@
+/* Grapheme cluster breaks function.
+ Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010,
+ based on code written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+void
+u8_grapheme_breaks (const uint8_t *s, size_t n, char *p)
+{
+ ucs4_t prev;
+ int mblen;
+
+ prev = 0;
+ for (; n > 0; s += mblen, p += mblen, n -= mblen)
+ {
+ ucs4_t next;
+ int i;
+
+ mblen = u8_mbtouc (&next, s, n);
+
+ p[0] = uc_is_grapheme_cluster_break (prev, next);
+ for (i = 1; i < mblen; i++)
+ p[i] = 0;
+
+ prev = next;
+ }
+}
diff --git a/lib/unigbrk/u8-grapheme-len.c b/lib/unigbrk/u8-grapheme-len.c
new file mode 100644
index 0000000000..9ef3938d27
--- /dev/null
+++ b/lib/unigbrk/u8-grapheme-len.c
@@ -0,0 +1,47 @@
+/* Grapheme cluster length function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+size_t
+u8_grapheme_len (const uint8_t *s, size_t n)
+{
+ ucs4_t prev;
+ size_t ofs;
+ int mblen;
+
+ if (n == 0)
+ return 0;
+
+ for (ofs = u8_mbtouc (&prev, s, n); ofs < n; ofs += mblen)
+ {
+ ucs4_t next;
+
+ mblen = u8_mbtouc (&next, s + ofs, n - ofs);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return ofs;
+}
diff --git a/lib/unigbrk/u8-grapheme-next.c b/lib/unigbrk/u8-grapheme-next.c
new file mode 100644
index 0000000000..8511a6e6f6
--- /dev/null
+++ b/lib/unigbrk/u8-grapheme-next.c
@@ -0,0 +1,46 @@
+/* Next grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint8_t *
+u8_grapheme_next (const uint8_t *s, const uint8_t *end)
+{
+ ucs4_t prev;
+ int mblen;
+
+ if (s == end)
+ return NULL;
+
+ for (s += u8_mbtouc (&prev, s, end - s); s != end; s += mblen)
+ {
+ ucs4_t next;
+
+ mblen = u8_mbtouc (&next, s, end - s);
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ prev = next;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/u8-grapheme-prev.c b/lib/unigbrk/u8-grapheme-prev.c
new file mode 100644
index 0000000000..39943fdc60
--- /dev/null
+++ b/lib/unigbrk/u8-grapheme-prev.c
@@ -0,0 +1,55 @@
+/* Previous grapheme cluster function.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include "unistr.h"
+
+const uint8_t *
+u8_grapheme_prev (const uint8_t *s, const uint8_t *start)
+{
+ ucs4_t next;
+ int mblen;
+
+ if (s == start)
+ return NULL;
+
+ s = u8_prev (&next, s, start);
+ while (s != start)
+ {
+ const uint8_t *prev_s;
+ ucs4_t prev;
+
+ prev_s = u8_prev (&prev, s, start);
+ if (prev_s == NULL)
+ {
+ /* Ill-formed UTF-8 encoding. */
+ return start;
+ }
+
+ if (uc_is_grapheme_cluster_break (prev, next))
+ break;
+
+ s = prev_s;
+ next = prev;
+ }
+
+ return s;
+}
diff --git a/lib/unigbrk/ulc-grapheme-breaks.c b/lib/unigbrk/ulc-grapheme-breaks.c
new file mode 100644
index 0000000000..d774902056
--- /dev/null
+++ b/lib/unigbrk/ulc-grapheme-breaks.c
@@ -0,0 +1,130 @@
+/* Grapheme cluster breaks function.
+ Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@cs.stanford.edu>, 2010,
+ based on code written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "unigbrk.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "c-ctype.h"
+#include "c-strcaseeq.h"
+#include "localcharset.h"
+#include "uniconv.h"
+
+static int
+is_utf8_encoding (const char *encoding)
+{
+ if (STRCASEEQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ return 1;
+ return 0;
+}
+
+#if C_CTYPE_ASCII
+/* Assume that every ASCII character starts a new grapheme, which is often
+ true, except that CR-LF is a single grapheme. */
+static void
+ascii_grapheme_breaks (const char *s, size_t n, char *p)
+{
+ size_t i;
+
+ p[0] = 1;
+ for (i = 1; i < n; i++)
+ {
+ bool is_ascii = c_isprint (s[i]) || c_isspace (s[i]);
+ p[i] = is_ascii && (s[i] != '\n' || s[i - 1] != '\r');
+ }
+}
+#endif
+
+/* Grapheme boundaries in a string in an arbitrary encoding.
+
+ We convert the input string to Unicode.
+
+ The standardized Unicode encodings are UTF-8, UCS-2, UCS-4, UTF-16,
+ UTF-16BE, UTF-16LE, UTF-7. UCS-2 supports only characters up to
+ \U0000FFFF. UTF-16 and variants support only characters up to
+ \U0010FFFF. UTF-7 is way too complex and not supported by glibc-2.1.
+ UCS-4 specification leaves doubts about endianness and byte order mark.
+ glibc currently interprets it as big endian without byte order mark,
+ but this is not backed by an RFC. So we use UTF-8. It supports
+ characters up to \U7FFFFFFF and is unambiguously defined. */
+
+void
+ulc_grapheme_breaks (const char *s, size_t n, char *p)
+{
+ if (n > 0)
+ {
+ const char *encoding = locale_charset ();
+
+ if (is_utf8_encoding (encoding))
+ u8_grapheme_breaks ((const uint8_t *) s, n, p);
+ else
+ {
+ /* Convert the string to UTF-8 and build a translation table
+ from offsets into s to offsets into the translated string. */
+ size_t *offsets = (size_t *) malloc (n * sizeof (size_t));
+
+ if (offsets != NULL)
+ {
+ uint8_t *t;
+ size_t m;
+
+ t = u8_conv_from_encoding (encoding, iconveh_question_mark,
+ s, n, offsets, NULL, &m);
+ if (t != NULL)
+ {
+ char *q = (char *) (m > 0 ? malloc (m) : NULL);
+
+ if (m == 0 || q != NULL)
+ {
+ size_t i;
+
+ /* Determine the grapheme breaks of the UTF-8 string. */
+ u8_grapheme_breaks (t, m, q);
+
+ /* Translate the result back to the original string. */
+ memset (p, 0, n);
+ for (i = 0; i < n; i++)
+ if (offsets[i] != (size_t)(-1))
+ p[i] = q[offsets[i]];
+
+ free (q);
+ free (t);
+ free (offsets);
+ return;
+ }
+ free (t);
+ }
+ free (offsets);
+ }
+
+ /* Impossible to convert. */
+#if C_CTYPE_ASCII
+ /* Fall back to ASCII as best we can. */
+ ascii_grapheme_breaks (s, n, p);
+#else
+ /* We cannot make any assumptions. */
+ p[0] = 1;
+ memset (p + 1, 0, n - 1);
+#endif
+ }
+ }
+}
diff --git a/m4/locale-ar.m4 b/m4/locale-ar.m4
new file mode 100644
index 0000000000..beb8ab387f
--- /dev/null
+++ b/m4/locale-ar.m4
@@ -0,0 +1,69 @@
+# locale-ar.m4 serial 1
+dnl Copyright (C) 2003, 2005-2010 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl From Ben Pfaff, based on locale-fr.m4 by Bruno Haible.
+
+dnl Determine the name of an Arabic locale with traditional encoding.
+AC_DEFUN([gt_LOCALE_AR],
+[
+ AC_REQUIRE([AC_CANONICAL_HOST])
+ AC_REQUIRE([AM_LANGINFO_CODESET])
+ AC_CACHE_CHECK([for a traditional Arabic locale], [gt_cv_locale_ar], [
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([
+changequote(,)dnl
+#include <locale.h>
+#include <time.h>
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+struct tm t;
+char buf[16];
+int main () {
+ /* Check whether the given locale name is recognized by the system. */
+ if (setlocale (LC_ALL, "") == NULL) return 1;
+ /* Check that nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". */
+#if HAVE_LANGINFO_CODESET
+ {
+ const char *cs = nl_langinfo (CODESET);
+ if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0
+ || strcmp (cs, "UTF-8") == 0)
+ return 1;
+ }
+#endif
+#ifdef __CYGWIN__
+ /* On Cygwin, avoid locale names without encoding suffix, because the
+ locale_charset() function relies on the encoding suffix. Note that
+ LC_ALL is set on the command line. */
+ if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1;
+#endif
+ /* Check that the name of the first month begins with U+0643 (ك) as encoded
+ by ISO 8859-6. This excludes the UTF-8 encoding. */
+ t.tm_year = 1975 - 1900; t.tm_mon = 1 - 1; t.tm_mday = 4;
+ strftime (buf, sizeof (buf), "%B", &t);
+ if ((unsigned char) buf[0] != 0xe3) return 1;
+ return 0;
+}
+changequote([,])dnl
+ ])])
+ if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
+ # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
+ # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
+ # configure script would override the LC_ALL setting. Likewise for
+ # LC_CTYPE, which is also set at the beginning of the configure script.
+ # Test for the usual locale name.
+ for gt_cv_locale_ar in ar_SA ar_SA.ISO-8859-6 ar_EG ar_EG.ISO-8859-6 none; do
+ if test $gt_cv_locale_ar = none || (LC_ALL=$gt_cv_locale_ar LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+ break
+ fi
+ done
+ fi
+ rm -fr conftest*
+ ])
+ LOCALE_AR=$gt_cv_locale_ar
+ AC_SUBST([LOCALE_AR])
+])
diff --git a/modules/unigbrk/u16-grapheme-breaks b/modules/unigbrk/u16-grapheme-breaks
new file mode 100644
index 0000000000..61fd4526d1
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-breaks
@@ -0,0 +1,28 @@
+Description:
+Find grapheme cluster breaks in UTF-16 string.
+
+Files:
+lib/unigbrk/u16-grapheme-breaks.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u16-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u16-grapheme-breaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-breaks])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_BREAKS
+lib_SOURCES += unigbrk/u16-grapheme-breaks.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u16-grapheme-breaks-tests b/modules/unigbrk/u16-grapheme-breaks-tests
new file mode 100644
index 0000000000..9f7f2dffef
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-breaks-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u16-grapheme-breaks.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u16-grapheme-breaks
+check_PROGRAMS += test-u16-grapheme-breaks
+test_u16_grapheme_breaks_SOURCES = unigbrk/test-u16-grapheme-breaks.c
+test_u16_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u16-grapheme-len b/modules/unigbrk/u16-grapheme-len
new file mode 100644
index 0000000000..06d064603c
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-len
@@ -0,0 +1,28 @@
+Description:
+Length of first grapheme cluster in UTF-16 string.
+
+Files:
+lib/unigbrk/u16-grapheme-len.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u16-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u16-grapheme-len])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-len])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_LEN
+lib_SOURCES += unigbrk/u16-grapheme-len.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u16-grapheme-len-tests b/modules/unigbrk/u16-grapheme-len-tests
new file mode 100644
index 0000000000..66f7aac7d5
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-len-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u16-grapheme-len.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u16-grapheme-len
+check_PROGRAMS += test-u16-grapheme-len
+test_u16_grapheme_len_SOURCES = unigbrk/test-u16-grapheme-len.c
+test_u16_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u16-grapheme-next b/modules/unigbrk/u16-grapheme-next
new file mode 100644
index 0000000000..3d3a9c7f22
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-next
@@ -0,0 +1,28 @@
+Description:
+Find start of next grapheme cluster in UTF-16 string.
+
+Files:
+lib/unigbrk/u16-grapheme-next.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u16-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u16-grapheme-next])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-next])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_NEXT
+lib_SOURCES += unigbrk/u16-grapheme-next.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u16-grapheme-next-tests b/modules/unigbrk/u16-grapheme-next-tests
new file mode 100644
index 0000000000..a947afe3f3
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-next-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u16-grapheme-next.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u16-grapheme-next
+check_PROGRAMS += test-u16-grapheme-next
+test_u16_grapheme_next_SOURCES = unigbrk/test-u16-grapheme-next.c
+test_u16_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u16-grapheme-prev b/modules/unigbrk/u16-grapheme-prev
new file mode 100644
index 0000000000..a58219e93f
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-prev
@@ -0,0 +1,28 @@
+Description:
+Find start of previous grapheme cluster in UTF-16 string.
+
+Files:
+lib/unigbrk/u16-grapheme-prev.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u16-prev
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u16-grapheme-prev])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u16-grapheme-prev])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U16_GRAPHEME_PREV
+lib_SOURCES += unigbrk/u16-grapheme-prev.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u16-grapheme-prev-tests b/modules/unigbrk/u16-grapheme-prev-tests
new file mode 100644
index 0000000000..be62d2468a
--- /dev/null
+++ b/modules/unigbrk/u16-grapheme-prev-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u16-grapheme-prev.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u16-grapheme-prev
+check_PROGRAMS += test-u16-grapheme-prev
+test_u16_grapheme_prev_SOURCES = unigbrk/test-u16-grapheme-prev.c
+test_u16_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u32-grapheme-breaks b/modules/unigbrk/u32-grapheme-breaks
new file mode 100644
index 0000000000..5ae699822c
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-breaks
@@ -0,0 +1,28 @@
+Description:
+Find grapheme cluster breaks in UTF-32 string.
+
+Files:
+lib/unigbrk/u32-grapheme-breaks.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u32-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u32-grapheme-breaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-breaks])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_BREAKS
+lib_SOURCES += unigbrk/u32-grapheme-breaks.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u32-grapheme-breaks-tests b/modules/unigbrk/u32-grapheme-breaks-tests
new file mode 100644
index 0000000000..4561344744
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-breaks-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u32-grapheme-breaks.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u32-grapheme-breaks
+check_PROGRAMS += test-u32-grapheme-breaks
+test_u32_grapheme_breaks_SOURCES = unigbrk/test-u32-grapheme-breaks.c
+test_u32_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u32-grapheme-len b/modules/unigbrk/u32-grapheme-len
new file mode 100644
index 0000000000..4129fb1b2f
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-len
@@ -0,0 +1,28 @@
+Description:
+Length of first grapheme cluster in UTF-32 string.
+
+Files:
+lib/unigbrk/u32-grapheme-len.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u32-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u32-grapheme-len])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-len])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_LEN
+lib_SOURCES += unigbrk/u32-grapheme-len.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u32-grapheme-len-tests b/modules/unigbrk/u32-grapheme-len-tests
new file mode 100644
index 0000000000..1618abda89
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-len-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u32-grapheme-len.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u32-grapheme-len
+check_PROGRAMS += test-u32-grapheme-len
+test_u32_grapheme_len_SOURCES = unigbrk/test-u32-grapheme-len.c
+test_u32_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u32-grapheme-next b/modules/unigbrk/u32-grapheme-next
new file mode 100644
index 0000000000..d2fab5a47b
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-next
@@ -0,0 +1,28 @@
+Description:
+Find start of next grapheme cluster in UTF-32 string.
+
+Files:
+lib/unigbrk/u32-grapheme-next.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u32-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u32-grapheme-next])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-next])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_NEXT
+lib_SOURCES += unigbrk/u32-grapheme-next.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u32-grapheme-next-tests b/modules/unigbrk/u32-grapheme-next-tests
new file mode 100644
index 0000000000..4ed8b1f94e
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-next-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u32-grapheme-next.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u32-grapheme-next
+check_PROGRAMS += test-u32-grapheme-next
+test_u32_grapheme_next_SOURCES = unigbrk/test-u32-grapheme-next.c
+test_u32_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u32-grapheme-prev b/modules/unigbrk/u32-grapheme-prev
new file mode 100644
index 0000000000..c1a6375908
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-prev
@@ -0,0 +1,28 @@
+Description:
+Find start of previous grapheme cluster in UTF-32 string.
+
+Files:
+lib/unigbrk/u32-grapheme-prev.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u32-prev
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u32-grapheme-prev])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u32-grapheme-prev])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_PREV
+lib_SOURCES += unigbrk/u32-grapheme-prev.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u32-grapheme-prev-tests b/modules/unigbrk/u32-grapheme-prev-tests
new file mode 100644
index 0000000000..0900722791
--- /dev/null
+++ b/modules/unigbrk/u32-grapheme-prev-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u32-grapheme-prev.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u32-grapheme-prev
+check_PROGRAMS += test-u32-grapheme-prev
+test_u32_grapheme_prev_SOURCES = unigbrk/test-u32-grapheme-prev.c
+test_u32_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u8-grapheme-breaks b/modules/unigbrk/u8-grapheme-breaks
new file mode 100644
index 0000000000..6d0e98958a
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-breaks
@@ -0,0 +1,28 @@
+Description:
+Find grapheme cluster breaks in UTF-8 string.
+
+Files:
+lib/unigbrk/u8-grapheme-breaks.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u8-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u8-grapheme-breaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-breaks])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_BREAKS
+lib_SOURCES += unigbrk/u8-grapheme-breaks.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u8-grapheme-breaks-tests b/modules/unigbrk/u8-grapheme-breaks-tests
new file mode 100644
index 0000000000..7d1f6d98d5
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-breaks-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u8-grapheme-breaks.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u8-grapheme-breaks
+check_PROGRAMS += test-u8-grapheme-breaks
+test_u8_grapheme_breaks_SOURCES = unigbrk/test-u8-grapheme-breaks.c
+test_u8_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u8-grapheme-len b/modules/unigbrk/u8-grapheme-len
new file mode 100644
index 0000000000..04e6c40efc
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-len
@@ -0,0 +1,28 @@
+Description:
+Length of first grapheme cluster in UTF-8 string.
+
+Files:
+lib/unigbrk/u8-grapheme-len.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u8-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u8-grapheme-len])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-len])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_LEN
+lib_SOURCES += unigbrk/u8-grapheme-len.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u8-grapheme-len-tests b/modules/unigbrk/u8-grapheme-len-tests
new file mode 100644
index 0000000000..4067abfe33
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-len-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u8-grapheme-len.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u8-grapheme-len
+check_PROGRAMS += test-u8-grapheme-len
+test_u8_grapheme_len_SOURCES = unigbrk/test-u8-grapheme-len.c
+test_u8_grapheme_len_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u8-grapheme-next b/modules/unigbrk/u8-grapheme-next
new file mode 100644
index 0000000000..355757f3b2
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-next
@@ -0,0 +1,28 @@
+Description:
+Find start of next grapheme cluster in UTF-8 string.
+
+Files:
+lib/unigbrk/u8-grapheme-next.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u8-mbtouc
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u8-grapheme-next])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-next])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_NEXT
+lib_SOURCES += unigbrk/u8-grapheme-next.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u8-grapheme-next-tests b/modules/unigbrk/u8-grapheme-next-tests
new file mode 100644
index 0000000000..d30709ea63
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-next-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u8-grapheme-next.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u8-grapheme-next
+check_PROGRAMS += test-u8-grapheme-next
+test_u8_grapheme_next_SOURCES = unigbrk/test-u8-grapheme-next.c
+test_u8_grapheme_next_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/u8-grapheme-prev b/modules/unigbrk/u8-grapheme-prev
new file mode 100644
index 0000000000..182dbb0fee
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-prev
@@ -0,0 +1,28 @@
+Description:
+Find start of previous grapheme cluster in UTF-8 string.
+
+Files:
+lib/unigbrk/u8-grapheme-prev.c
+tests/macros.h
+
+Depends-on:
+unigbrk/uc-is-grapheme-break
+unistr/u8-prev
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/u8-grapheme-prev])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/u8-grapheme-prev])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U8_GRAPHEME_PREV
+lib_SOURCES += unigbrk/u8-grapheme-prev.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/u8-grapheme-prev-tests b/modules/unigbrk/u8-grapheme-prev-tests
new file mode 100644
index 0000000000..137e8464f7
--- /dev/null
+++ b/modules/unigbrk/u8-grapheme-prev-tests
@@ -0,0 +1,12 @@
+Files:
+tests/unigbrk/test-u8-grapheme-prev.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u8-grapheme-prev
+check_PROGRAMS += test-u8-grapheme-prev
+test_u8_grapheme_prev_SOURCES = unigbrk/test-u8-grapheme-prev.c
+test_u8_grapheme_prev_LDADD = $(LDADD) $(LIBUNISTRING)
diff --git a/modules/unigbrk/ulc-grapheme-breaks b/modules/unigbrk/ulc-grapheme-breaks
new file mode 100644
index 0000000000..e13e7fde8a
--- /dev/null
+++ b/modules/unigbrk/ulc-grapheme-breaks
@@ -0,0 +1,30 @@
+Description:
+Grapheme cluster breaks in locale-encoded string.
+
+Files:
+lib/unigbrk/ulc-grapheme-breaks.c
+
+Depends-on:
+unigbrk/base
+unigbrk/u8-grapheme-breaks
+uniconv/u8-conv-from-enc
+c-ctype
+localcharset
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/ulc-grapheme-breaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/ulc-grapheme-breaks])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_ULC_GRAPHEME_BREAKS
+lib_SOURCES += unigbrk/ulc-grapheme-breaks.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPL
+
+Maintainer:
+Ben Pfaff, Bruno Haible
diff --git a/modules/unigbrk/ulc-grapheme-breaks-tests b/modules/unigbrk/ulc-grapheme-breaks-tests
new file mode 100644
index 0000000000..60cd513f7f
--- /dev/null
+++ b/modules/unigbrk/ulc-grapheme-breaks-tests
@@ -0,0 +1,18 @@
+Files:
+tests/unigbrk/test-ulc-grapheme-breaks.sh
+tests/unigbrk/test-ulc-grapheme-breaks.c
+tests/macros.h
+m4/locale-ar.m4
+m4/codeset.m4
+
+Depends-on:
+
+configure.ac:
+gt_LOCALE_AR
+
+Makefile.am:
+TESTS += unigbrk/test-ulc-grapheme-breaks.sh
+TESTS_ENVIRONMENT += LOCALE_AR='@LOCALE_AR@'
+check_PROGRAMS += test-ulc-grapheme-breaks
+test_ulc_grapheme_breaks_SOURCES = unigbrk/test-ulc-grapheme-breaks.c
+test_ulc_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING) @LIBICONV@
diff --git a/tests/unigbrk/test-u16-grapheme-breaks.c b/tests/unigbrk/test-u16-grapheme-breaks.c
new file mode 100644
index 0000000000..98cd7631ae
--- /dev/null
+++ b/tests/unigbrk/test-u16-grapheme-breaks.c
@@ -0,0 +1,105 @@
+/* Grapheme cluster breaks test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macros.h"
+
+static void
+test_u16_grapheme_breaks (const char *expected, ...)
+{
+ size_t n = strlen (expected);
+ uint16_t s[16];
+ va_list args;
+ char breaks[16];
+ size_t i;
+
+ ASSERT (n <= 16);
+
+ memset (breaks, 0xcc, n);
+
+ va_start (args, expected);
+ for (i = 0; i < n; i++)
+ {
+ int unit = va_arg (args, int);
+ ASSERT (unit >= 0);
+ s[i] = unit;
+ }
+ ASSERT (va_arg (args, int) == -1);
+ va_end (args);
+
+ u16_grapheme_breaks (s, n, breaks);
+ for (i = 0; i < n; i++)
+ if (breaks[i] != (expected[i] == '#'))
+ {
+ size_t j;
+
+ fprintf (stderr, "wrong grapheme breaks:\n");
+
+ fprintf (stderr, " input:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %02x", s[j]);
+ putc ('\n', stderr);
+
+ fprintf (stderr, "expected:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", expected[j] == '#');
+ putc ('\n', stderr);
+
+ fprintf (stderr, " actual:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", breaks[j]);
+ putc ('\n', stderr);
+
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const char s[] = "abc";
+
+ /* Standalone 1-unit graphemes. */
+ test_u16_grapheme_breaks ("#", 'a', -1);
+ test_u16_grapheme_breaks ("##", 'a', 'b', -1);
+ test_u16_grapheme_breaks ("###", 'a', 'b', 'c', -1);
+
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u16_grapheme_breaks ("#", HIRAGANA_A, -1);
+ test_u16_grapheme_breaks ("##", HIRAGANA_A, 'x', -1);
+ test_u16_grapheme_breaks ("##", HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u16_grapheme_breaks ("#_", 'e', ACUTE, -1);
+ test_u16_grapheme_breaks ("#__", 'e', ACUTE, GRAVE, -1);
+ test_u16_grapheme_breaks ("#_#", 'e', ACUTE, 'x', -1);
+ test_u16_grapheme_breaks ("#_#_", 'e', ACUTE, 'e', GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u16-grapheme-len.c b/tests/unigbrk/test-u16-grapheme-len.c
new file mode 100644
index 0000000000..cce9ea63ab
--- /dev/null
+++ b/tests/unigbrk/test-u16-grapheme-len.c
@@ -0,0 +1,95 @@
+/* Grapheme cluster length test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void
+test_u16_grapheme_len (size_t len, ...)
+{
+ uint16_t s[16];
+ size_t retval;
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ retval = u16_grapheme_len (s, n);
+ if (retval != len)
+ {
+ size_t i;
+
+ fprintf (stderr, "u16_grapheme_len counted %zu units, expected %zu:",
+ retval, len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+
+int
+main (void)
+{
+ /* Empty string. */
+ test_u16_grapheme_len (0, -1);
+
+ /* Standalone 1-unit graphemes. */
+ test_u16_grapheme_len (1, 'a', -1);
+ test_u16_grapheme_len (1, 'a', 'b', -1);
+ test_u16_grapheme_len (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u16_grapheme_len (1, HIRAGANA_A, -1);
+ test_u16_grapheme_len (1, HIRAGANA_A, 'x', -1);
+ test_u16_grapheme_len (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u16_grapheme_len (2, 'e', ACUTE, -1);
+ test_u16_grapheme_len (3, 'e', ACUTE, GRAVE, -1);
+ test_u16_grapheme_len (2, 'e', ACUTE, 'x', -1);
+ test_u16_grapheme_len (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Surrogate pairs. */
+ test_u16_grapheme_len (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */
+ test_u16_grapheme_len (3, 0xd83d, 0xde10, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u16-grapheme-next.c b/tests/unigbrk/test-u16-grapheme-next.c
new file mode 100644
index 0000000000..b69bad75ef
--- /dev/null
+++ b/tests/unigbrk/test-u16-grapheme-next.c
@@ -0,0 +1,102 @@
+/* Next grapheme cluster length test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u16_grapheme_next (size_t len, ...)
+{
+ const uint16_t *next;
+ uint16_t s[16];
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ next = u16_grapheme_next (s, s + n);
+ if (next != s + len)
+ {
+ size_t i;
+
+ if (next == NULL)
+ fputs ("u16_grapheme_next returned NULL", stderr);
+ else
+ fprintf (stderr, "u16_grapheme_next skipped %zu units", next - s);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const uint16_t s[] = { 'a', 'b', 'c' };
+
+ /* Empty string. */
+ ASSERT (u16_grapheme_next (NULL, NULL) == NULL);
+ ASSERT (u16_grapheme_next (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u16_grapheme_next (1, 'a', -1);
+ test_u16_grapheme_next (1, 'a', 'b', -1);
+ test_u16_grapheme_next (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u16_grapheme_next (1, HIRAGANA_A, -1);
+ test_u16_grapheme_next (1, HIRAGANA_A, 'x', -1);
+ test_u16_grapheme_next (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u16_grapheme_next (2, 'e', ACUTE, -1);
+ test_u16_grapheme_next (3, 'e', ACUTE, GRAVE, -1);
+ test_u16_grapheme_next (2, 'e', ACUTE, 'x', -1);
+ test_u16_grapheme_next (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Surrogate pairs. */
+ test_u16_grapheme_next (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */
+ test_u16_grapheme_next (3, 0xd83d, 0xde10, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u16-grapheme-prev.c b/tests/unigbrk/test-u16-grapheme-prev.c
new file mode 100644
index 0000000000..8d7ec66681
--- /dev/null
+++ b/tests/unigbrk/test-u16-grapheme-prev.c
@@ -0,0 +1,104 @@
+/* Previous grapheme cluster test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u16_grapheme_prev (size_t len, ...)
+{
+ const uint16_t *prev;
+ const uint16_t *end;
+ uint16_t s[16];
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ end = s + n;
+ prev = u16_grapheme_prev (end, s);
+ if (prev != end - len)
+ {
+ size_t i;
+
+ if (prev == NULL)
+ fputs ("u16_grapheme_prev returned NULL", stderr);
+ else
+ fprintf (stderr, "u16_grapheme_prev skipped %zu units", end - prev);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const uint16_t s[] = { 'a', 'b', 'c' };
+
+ /* Empty string. */
+ ASSERT (u16_grapheme_prev (NULL, NULL) == NULL);
+ ASSERT (u16_grapheme_prev (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u16_grapheme_prev (1, 'a', -1);
+ test_u16_grapheme_prev (1, 'a', 'b', -1);
+ test_u16_grapheme_prev (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u16_grapheme_prev (1, HIRAGANA_A, -1);
+ test_u16_grapheme_prev (1, HIRAGANA_A, 'x', -1);
+ test_u16_grapheme_prev (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u16_grapheme_prev (2, 'e', ACUTE, -1);
+ test_u16_grapheme_prev (3, 'e', ACUTE, GRAVE, -1);
+ test_u16_grapheme_prev (1, 'e', ACUTE, 'x', -1);
+ test_u16_grapheme_prev (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Surrogate pairs. */
+ test_u16_grapheme_prev (2, 0xd83d, 0xde10, -1); /* 😐: neutral face. */
+ test_u16_grapheme_prev (3, 0xd83d, 0xde10, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u32-grapheme-breaks.c b/tests/unigbrk/test-u32-grapheme-breaks.c
new file mode 100644
index 0000000000..e4e91ff1f8
--- /dev/null
+++ b/tests/unigbrk/test-u32-grapheme-breaks.c
@@ -0,0 +1,105 @@
+/* Grapheme cluster breaks test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macros.h"
+
+static void
+test_u32_grapheme_breaks (const char *expected, ...)
+{
+ size_t n = strlen (expected);
+ uint32_t s[16];
+ va_list args;
+ char breaks[16];
+ size_t i;
+
+ ASSERT (n <= 16);
+
+ memset (breaks, 0xcc, n);
+
+ va_start (args, expected);
+ for (i = 0; i < n; i++)
+ {
+ int unit = va_arg (args, int);
+ ASSERT (unit >= 0);
+ s[i] = unit;
+ }
+ ASSERT (va_arg (args, int) == -1);
+ va_end (args);
+
+ u32_grapheme_breaks (s, n, breaks);
+ for (i = 0; i < n; i++)
+ if (breaks[i] != (expected[i] == '#'))
+ {
+ size_t j;
+
+ fprintf (stderr, "wrong grapheme breaks:\n");
+
+ fprintf (stderr, " input:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %02x", s[j]);
+ putc ('\n', stderr);
+
+ fprintf (stderr, "expected:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", expected[j] == '#');
+ putc ('\n', stderr);
+
+ fprintf (stderr, " actual:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", breaks[j]);
+ putc ('\n', stderr);
+
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const char s[] = "abc";
+
+ /* Standalone 1-unit graphemes. */
+ test_u32_grapheme_breaks ("#", 'a', -1);
+ test_u32_grapheme_breaks ("##", 'a', 'b', -1);
+ test_u32_grapheme_breaks ("###", 'a', 'b', 'c', -1);
+
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u32_grapheme_breaks ("#", HIRAGANA_A, -1);
+ test_u32_grapheme_breaks ("##", HIRAGANA_A, 'x', -1);
+ test_u32_grapheme_breaks ("##", HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u32_grapheme_breaks ("#_", 'e', ACUTE, -1);
+ test_u32_grapheme_breaks ("#__", 'e', ACUTE, GRAVE, -1);
+ test_u32_grapheme_breaks ("#_#", 'e', ACUTE, 'x', -1);
+ test_u32_grapheme_breaks ("#_#_", 'e', ACUTE, 'e', GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u32-grapheme-len.c b/tests/unigbrk/test-u32-grapheme-len.c
new file mode 100644
index 0000000000..af002f07ef
--- /dev/null
+++ b/tests/unigbrk/test-u32-grapheme-len.c
@@ -0,0 +1,96 @@
+/* Grapheme cluster length test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void
+test_u32_grapheme_len (size_t len, ...)
+{
+ uint32_t s[32];
+ size_t retval;
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ retval = u32_grapheme_len (s, n);
+ if (retval != len)
+ {
+ size_t i;
+
+ fprintf (stderr, "u32_grapheme_len counted %zu units, expected %zu:",
+ retval, len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", (unsigned int) s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+
+int
+main (void)
+{
+ /* Empty string. */
+ test_u32_grapheme_len (0, -1);
+
+ /* Standalone 1-unit graphemes. */
+ test_u32_grapheme_len (1, 'a', -1);
+ test_u32_grapheme_len (1, 'a', 'b', -1);
+ test_u32_grapheme_len (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u32_grapheme_len (1, HIRAGANA_A, -1);
+ test_u32_grapheme_len (1, HIRAGANA_A, 'x', -1);
+ test_u32_grapheme_len (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u32_grapheme_len (2, 'e', ACUTE, -1);
+ test_u32_grapheme_len (3, 'e', ACUTE, GRAVE, -1);
+ test_u32_grapheme_len (2, 'e', ACUTE, 'x', -1);
+ test_u32_grapheme_len (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Outside BMP. */
+#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */
+ test_u32_grapheme_len (1, NEUTRAL_FACE, -1);
+ test_u32_grapheme_len (2, NEUTRAL_FACE, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u32-grapheme-next.c b/tests/unigbrk/test-u32-grapheme-next.c
new file mode 100644
index 0000000000..0c4017eb41
--- /dev/null
+++ b/tests/unigbrk/test-u32-grapheme-next.c
@@ -0,0 +1,103 @@
+/* Next grapheme cluster length test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u32_grapheme_next (size_t len, ...)
+{
+ const uint32_t *next;
+ uint32_t s[32];
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ next = u32_grapheme_next (s, s + n);
+ if (next != s + len)
+ {
+ size_t i;
+
+ if (next == NULL)
+ fputs ("u32_grapheme_next returned NULL", stderr);
+ else
+ fprintf (stderr, "u32_grapheme_next skipped %zu units", next - s);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const uint32_t s[] = { 'a', 'b', 'c' };
+
+ /* Empty string. */
+ ASSERT (u32_grapheme_next (NULL, NULL) == NULL);
+ ASSERT (u32_grapheme_next (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u32_grapheme_next (1, 'a', -1);
+ test_u32_grapheme_next (1, 'a', 'b', -1);
+ test_u32_grapheme_next (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u32_grapheme_next (1, HIRAGANA_A, -1);
+ test_u32_grapheme_next (1, HIRAGANA_A, 'x', -1);
+ test_u32_grapheme_next (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u32_grapheme_next (2, 'e', ACUTE, -1);
+ test_u32_grapheme_next (3, 'e', ACUTE, GRAVE, -1);
+ test_u32_grapheme_next (2, 'e', ACUTE, 'x', -1);
+ test_u32_grapheme_next (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Outside BMP. */
+#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */
+ test_u32_grapheme_next (1, NEUTRAL_FACE, -1);
+ test_u32_grapheme_next (2, NEUTRAL_FACE, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u32-grapheme-prev.c b/tests/unigbrk/test-u32-grapheme-prev.c
new file mode 100644
index 0000000000..691ed8055a
--- /dev/null
+++ b/tests/unigbrk/test-u32-grapheme-prev.c
@@ -0,0 +1,105 @@
+/* Previous grapheme cluster test.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u32_grapheme_prev (size_t len, ...)
+{
+ const uint32_t *prev;
+ const uint32_t *end;
+ uint32_t s[16];
+ va_list args;
+ size_t n;
+
+ va_start (args, len);
+ n = 0;
+ for (;;)
+ {
+ int unit = va_arg (args, int);
+ if (unit == -1)
+ break;
+ else if (n >= sizeof s / sizeof *s)
+ abort ();
+
+ s[n++] = unit;
+ }
+ va_end (args);
+
+ end = s + n;
+ prev = u32_grapheme_prev (end, s);
+ if (prev != end - len)
+ {
+ size_t i;
+
+ if (prev == NULL)
+ fputs ("u32_grapheme_prev returned NULL", stderr);
+ else
+ fprintf (stderr, "u32_grapheme_prev skipped %zu units", end - prev);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %04x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const uint32_t s[] = { 'a', 'b', 'c' };
+
+ /* Empty string. */
+ ASSERT (u32_grapheme_prev (NULL, NULL) == NULL);
+ ASSERT (u32_grapheme_prev (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u32_grapheme_prev (1, 'a', -1);
+ test_u32_grapheme_prev (1, 'a', 'b', -1);
+ test_u32_grapheme_prev (1, 'a', 'b', 'c', -1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
+ test_u32_grapheme_prev (1, HIRAGANA_A, -1);
+ test_u32_grapheme_prev (1, HIRAGANA_A, 'x', -1);
+ test_u32_grapheme_prev (1, HIRAGANA_A, HIRAGANA_A, -1);
+
+ /* Combining accents. */
+#define GRAVE 0x0300 /* Combining grave accent. */
+#define ACUTE 0x0301 /* Combining acute accent. */
+ test_u32_grapheme_prev (2, 'e', ACUTE, -1);
+ test_u32_grapheme_prev (3, 'e', ACUTE, GRAVE, -1);
+ test_u32_grapheme_prev (1, 'e', ACUTE, 'x', -1);
+ test_u32_grapheme_prev (2, 'e', ACUTE, 'e', ACUTE, -1);
+
+ /* Outside BMP. */
+#define NEUTRAL_FACE 0x1f610 /* 😐: neutral face. */
+ test_u32_grapheme_prev (1, NEUTRAL_FACE, -1);
+ test_u32_grapheme_prev (2, NEUTRAL_FACE, GRAVE, -1);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u8-grapheme-breaks.c b/tests/unigbrk/test-u8-grapheme-breaks.c
new file mode 100644
index 0000000000..3bbebb29bb
--- /dev/null
+++ b/tests/unigbrk/test-u8-grapheme-breaks.c
@@ -0,0 +1,96 @@
+/* Grapheme cluster breaks test.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macros.h"
+
+static void
+test_u8_grapheme_breaks (const uint8_t *s, const char *expected)
+{
+ size_t n = strlen (expected);
+ char *breaks;
+ size_t i;
+
+ breaks = malloc (n);
+ if (!breaks)
+ abort ();
+ memset (breaks, 0xcc, n);
+
+ u8_grapheme_breaks (s, n, breaks);
+ for (i = 0; i < n; i++)
+ if (breaks[i] != (expected[i] == '#'))
+ {
+ size_t j;
+
+ fprintf (stderr, "wrong grapheme breaks:\n");
+
+ fprintf (stderr, " input:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %02x", s[j]);
+ putc ('\n', stderr);
+
+ fprintf (stderr, "expected:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", expected[j] == '#');
+ putc ('\n', stderr);
+
+ fprintf (stderr, " actual:");
+ for (j = 0; j < n; j++)
+ fprintf (stderr, " %d", breaks[j]);
+ putc ('\n', stderr);
+
+ abort ();
+ }
+
+ free (breaks);
+}
+
+int
+main (void)
+{
+ static const char s[] = "abc";
+
+ /* Standalone 1-unit graphemes. */
+ test_u8_grapheme_breaks ("a", "#");
+ test_u8_grapheme_breaks ("ab", "##");
+ test_u8_grapheme_breaks ("abc", "###");
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */
+ test_u8_grapheme_breaks (HIRAGANA_A, "#__");
+ test_u8_grapheme_breaks (HIRAGANA_A"x", "#__#");
+ test_u8_grapheme_breaks (HIRAGANA_A HIRAGANA_A, "#__#__");
+
+ /* Combining accents. */
+#define GRAVE "\314\200" /* Combining grave accent. */
+#define ACUTE "\314\201" /* Combining acute accent. */
+ test_u8_grapheme_breaks ("e"ACUTE, "#__");
+ test_u8_grapheme_breaks ("e"ACUTE GRAVE, "#____");
+ test_u8_grapheme_breaks ("e"ACUTE"x", "#__#");
+ test_u8_grapheme_breaks ("e"ACUTE "e"ACUTE, "#__#__");
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u8-grapheme-len.c b/tests/unigbrk/test-u8-grapheme-len.c
new file mode 100644
index 0000000000..19b7a543ab
--- /dev/null
+++ b/tests/unigbrk/test-u8-grapheme-len.c
@@ -0,0 +1,52 @@
+/* Grapheme cluster length test.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include "macros.h"
+
+int
+main (void)
+{
+ /* Empty string. */
+ ASSERT (u8_grapheme_len ("", 0) == 0);
+
+ /* Standalone 1-unit graphemes. */
+ ASSERT (u8_grapheme_len ("a", 1) == 1);
+ ASSERT (u8_grapheme_len ("ab", 2) == 1);
+ ASSERT (u8_grapheme_len ("abc", 3) == 1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */
+ ASSERT (u8_grapheme_len (HIRAGANA_A, 3) == 3);
+ ASSERT (u8_grapheme_len (HIRAGANA_A"x", 4) == 3);
+ ASSERT (u8_grapheme_len (HIRAGANA_A HIRAGANA_A, 6) == 3);
+
+ /* Combining accents. */
+#define GRAVE "\314\200" /* Combining grave accent. */
+#define ACUTE "\314\201" /* Combining acute accent. */
+ ASSERT (u8_grapheme_len ("e"ACUTE, 3) == 3);
+ ASSERT (u8_grapheme_len ("e"ACUTE GRAVE, 5) == 5);
+ ASSERT (u8_grapheme_len ("e"ACUTE"x", 4) == 3);
+ ASSERT (u8_grapheme_len ("e"ACUTE "e"ACUTE, 6) == 3);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u8-grapheme-next.c b/tests/unigbrk/test-u8-grapheme-next.c
new file mode 100644
index 0000000000..e67178ec78
--- /dev/null
+++ b/tests/unigbrk/test-u8-grapheme-next.c
@@ -0,0 +1,78 @@
+/* Next grapheme cluster length test.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u8_grapheme_next (const uint8_t *s, size_t n, size_t len)
+{
+ const uint8_t *next = u8_grapheme_next (s, s + n);
+ if (next != s + len)
+ {
+ size_t i;
+
+ if (next == NULL)
+ fputs ("u8_grapheme_next returned NULL", stderr);
+ else
+ fprintf (stderr, "u8_grapheme_next skipped %zu bytes", next - s);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %02x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const uint8_t s[] = "abc";
+
+ /* Empty string. */
+ ASSERT (u8_grapheme_next (NULL, NULL) == NULL);
+ ASSERT (u8_grapheme_next (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u8_grapheme_next ("a", 1, 1);
+ test_u8_grapheme_next ("ab", 2, 1);
+ test_u8_grapheme_next ("abc", 3, 1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */
+ test_u8_grapheme_next (HIRAGANA_A, 3, 3);
+ test_u8_grapheme_next (HIRAGANA_A"x", 4, 3);
+ test_u8_grapheme_next (HIRAGANA_A HIRAGANA_A, 6, 3);
+
+ /* Combining accents. */
+#define GRAVE "\314\200" /* Combining grave accent. */
+#define ACUTE "\314\201" /* Combining acute accent. */
+ test_u8_grapheme_next ("e"ACUTE, 3, 3);
+ test_u8_grapheme_next ("e"ACUTE GRAVE, 5, 5);
+ test_u8_grapheme_next ("e"ACUTE"x", 4, 3);
+ test_u8_grapheme_next ("e"ACUTE "e"ACUTE, 6, 3);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-u8-grapheme-prev.c b/tests/unigbrk/test-u8-grapheme-prev.c
new file mode 100644
index 0000000000..2f1090131c
--- /dev/null
+++ b/tests/unigbrk/test-u8-grapheme-prev.c
@@ -0,0 +1,79 @@
+/* Previous grapheme cluster test.
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static void
+test_u8_grapheme_prev (const uint8_t *s, size_t n, size_t len)
+{
+ const uint8_t *end = s + n;
+ const uint8_t *prev = u8_grapheme_prev (end, s);
+ if (prev != end - len)
+ {
+ size_t i;
+
+ if (prev == NULL)
+ fputs ("u8_grapheme_prev returned NULL", stderr);
+ else
+ fprintf (stderr, "u8_grapheme_prev skipped %zu bytes", end - prev);
+ fprintf (stderr, ", expected %zu:\n", len);
+ for (i = 0; i < n; i++)
+ fprintf (stderr, " %02x", s[i]);
+ putc ('\n', stderr);
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ static const char s[] = "abc";
+
+ /* Empty string. */
+ ASSERT (u8_grapheme_prev (NULL, NULL) == NULL);
+ ASSERT (u8_grapheme_prev (s, s) == NULL);
+
+ /* Standalone 1-unit graphemes. */
+ test_u8_grapheme_prev ("a", 1, 1);
+ test_u8_grapheme_prev ("ab", 2, 1);
+ test_u8_grapheme_prev ("abc", 3, 1);
+
+ /* Multi-unit, single code point graphemes. */
+#define HIRAGANA_A "\343\201\202" /* あ: Hiragana letter 'a'. */
+ test_u8_grapheme_prev (HIRAGANA_A, 3, 3);
+ test_u8_grapheme_prev (HIRAGANA_A"x", 4, 1);
+ test_u8_grapheme_prev (HIRAGANA_A HIRAGANA_A, 6, 3);
+
+ /* Combining accents. */
+#define GRAVE "\314\200" /* Combining grave accent. */
+#define ACUTE "\314\201" /* Combining acute accent. */
+ test_u8_grapheme_prev ("e"ACUTE, 3, 3);
+ test_u8_grapheme_prev ("e"ACUTE GRAVE, 5, 5);
+ test_u8_grapheme_prev ("e"ACUTE"x", 4, 1);
+ test_u8_grapheme_prev ("e"ACUTE "e"ACUTE, 6, 3);
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-ulc-grapheme-breaks.c b/tests/unigbrk/test-ulc-grapheme-breaks.c
new file mode 100644
index 0000000000..5bb7bdada6
--- /dev/null
+++ b/tests/unigbrk/test-ulc-grapheme-breaks.c
@@ -0,0 +1,86 @@
+/* Grapheme cluster breaks test.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010,
+ based on code by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#include "unigbrk.h"
+
+#include <locale.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+static bool
+is_8859_6_break (unsigned char c)
+{
+ /* ISO-8859-6 has combining characters in positions 0xeb through 0xf2. */
+ return !(c >= 0xeb && c <= 0xf2);
+}
+
+int
+main ()
+{
+ /* configure should already have checked that the locale is supported. */
+ if (setlocale (LC_ALL, "") == NULL)
+ return 1;
+
+ /* Test case n = 0. */
+ ulc_grapheme_breaks (NULL, 0, NULL);
+
+#if HAVE_ICONV
+ {
+ /* This is just a random collection of bytes from ISO-8859-6.
+
+ (We use ISO-8859-6 because it is one of very few non-UTF-8 locale
+ encodings supported by glibc that have combining characters.) */
+ static const char s[] = "ZYX\352\353W\360\361V\362";
+ enum { LENGTH = sizeof s - 1 };
+ char p[LENGTH];
+ size_t i;
+
+ ulc_grapheme_breaks (s, LENGTH, p);
+ for (i = 0; i < LENGTH; i++)
+ if (p[i] != is_8859_6_break (s[i]))
+ {
+ size_t j;
+
+ fprintf (stderr, "wrong grapheme breaks:\n");
+
+ fprintf (stderr, " input:");
+ for (j = 0; j < LENGTH; j++)
+ fprintf (stderr, " %02x", (unsigned char) s[j]);
+ putc ('\n', stderr);
+
+ fprintf (stderr, "expected:");
+ for (j = 0; j < LENGTH; j++)
+ fprintf (stderr, " %d", is_8859_6_break (s[j]));
+ putc ('\n', stderr);
+
+ fprintf (stderr, " actual:");
+ for (j = 0; j < LENGTH; j++)
+ fprintf (stderr, " %d", p[j]);
+ putc ('\n', stderr);
+
+ abort ();
+ }
+ }
+#endif
+
+ return 0;
+}
diff --git a/tests/unigbrk/test-ulc-grapheme-breaks.sh b/tests/unigbrk/test-ulc-grapheme-breaks.sh
new file mode 100755
index 0000000000..534df61b3c
--- /dev/null
+++ b/tests/unigbrk/test-ulc-grapheme-breaks.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# Test in an ISO-8895-6 locale.
+: ${LOCALE_AR=ar_SA}
+if test $LOCALE_AR = none; then
+ if test -f /usr/bin/localedef; then
+ echo "Skipping test: no traditional Arabic locale is installed"
+ else
+ echo "Skipping test: no traditional Arabic locale is supported"
+ fi
+ exit 77
+fi
+
+LC_ALL=$LOCALE_AR \
+./test-ulc-grapheme-breaks${EXEEXT}