summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog23
-rw-r--r--lib/gen-uni-tables.c58
-rw-r--r--lib/unicase/cased.c28
-rw-r--r--lib/unicase/cased.h343
-rw-r--r--lib/unicase/ignorable.c47
-rw-r--r--lib/unicase/ignorable.h544
-rw-r--r--modules/unicase/cased7
-rw-r--r--modules/unicase/ignorable5
-rw-r--r--tests/unicase/test-u16-tolower.c61
-rw-r--r--tests/unicase/test-u32-tolower.c61
-rw-r--r--tests/unicase/test-u8-tolower.c61
11 files changed, 1216 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 659a289c47..108e19b737 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,28 @@
2009-06-30 Bruno Haible <bruno@clisp.org>
+ Fix evaluation of "Before C" condition of FINAL_SIGMA.
+ * lib/gen-uni-tables.c (is_cased, is_case_ignorable): New functions.
+ (output_casing_properties): New function.
+ (main): Call it.
+ * lib/unicase/cased.h: New file, generated by gen-uni-tables.
+ * lib/unicase/cased.c: Include unictype/bitmap.h.
+ (uc_is_cased): Define through a bitmap lookup.
+ * lib/unicase/ignorable.h: New file, generated by gen-uni-tables.
+ * lib/unicase/ignorable.c: Include unictype/bitmap.h.
+ (uc_is_case_ignorable): Define through a bitmap lookup.
+ * modules/unicase/cased (Files): Add lib/unicase/cased.h,
+ lib/unictype/bitmap.h.
+ (Depends-on): Add inline. Clean up.
+ * modules/unicase/ignorable (Files): Add lib/unicase/ignorable.h,
+ lib/unictype/bitmap.h.
+ (Depends-on): Add inline. Clean up.
+ * tests/unicase/test-u8-tolower.c (main): Add more tests of FINAL_SIGMA
+ recognition.
+ * tests/unicase/test-u16-tolower.c (main): Likewise.
+ * tests/unicase/test-u32-tolower.c (main): Likewise.
+
+2009-06-30 Bruno Haible <bruno@clisp.org>
+
* lib/unicase/u8-casemap.c: Don't include uniwbrk.h.
* lib/unicase/u16-casemap.c: Likewise.
* lib/unicase/u32-casemap.c: Likewise.
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index a50751758c..94752b2519 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -8210,6 +8210,63 @@ output_casing_rules (const char *filename, const char *version)
/* ========================================================================= */
+/* Quoting the Unicode standard:
+ Definition: A character is defined to be "cased" if it has the Lowercase
+ or Uppercase property or has a General_Category value of
+ Titlecase_Letter. */
+static bool
+is_cased (unsigned int ch)
+{
+ return (is_property_lowercase (ch)
+ || is_property_uppercase (ch)
+ || is_category_Lt (ch));
+}
+
+/* Quoting the Unicode standard:
+ Definition: A character is defined to be "case-ignorable" if it has the
+ value MidLetter {or the value MidNumLet} for the Word_Break property or
+ its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
+ Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
+ The text marked in braces was added in Unicode 5.1.0, see
+ <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
+ Definition of case-ignorable". */
+/* Since this predicate is only used for the "Before C" and "After C"
+ conditions of FINAL_SIGMA, we exclude the "cased" characters here.
+ This simplifies the evaluation of the regular expressions
+ \p{cased} (\p{case-ignorable})* C
+ and
+ C (\p{case-ignorable})* \p{cased}
+ */
+static bool
+is_case_ignorable (unsigned int ch)
+{
+ return (unicode_org_wbp[ch] == WBP_MIDLETTER
+ || unicode_org_wbp[ch] == WBP_MIDNUMLET
+ || is_category_Mn (ch)
+ || is_category_Me (ch)
+ || is_category_Cf (ch)
+ || is_category_Lm (ch)
+ || is_category_Sk (ch))
+ && !is_cased (ch);
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* Output all case related properties. */
+static void
+output_casing_properties (const char *version)
+{
+#define PROPERTY(FN,P) \
+ debug_output_predicate ("unicase/" #FN ".txt", is_ ## P); \
+ output_predicate_test ("../tests/unicase/test-" #FN ".c", is_ ## P, "uc_is_" #P " (c)"); \
+ output_predicate ("unicase/" #FN ".h", is_ ## P, "u_casing_property_" #P, "Casing Properties", version);
+ PROPERTY(cased, cased)
+ PROPERTY(ignorable, case_ignorable)
+#undef PROPERTY
+}
+
+/* ========================================================================= */
+
int
main (int argc, char * argv[])
{
@@ -8302,6 +8359,7 @@ main (int argc, char * argv[])
output_simple_mapping ("unicase/totitle.h", to_title, version);
output_simple_mapping ("unicase/tocasefold.h", to_casefold, version);
output_casing_rules ("unicase/special-casing-table.gperf", version);
+ output_casing_properties (version);
return 0;
}
diff --git a/lib/unicase/cased.c b/lib/unicase/cased.c
index 1dc19c5001..5fbf4cb745 100644
--- a/lib/unicase/cased.c
+++ b/lib/unicase/cased.c
@@ -1,5 +1,5 @@
/* Test whether a Unicode character is cased.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -20,16 +20,34 @@
/* Specification. */
#include "caseprop.h"
+/* Quoting the Unicode standard:
+ Definition: A character is defined to be "cased" if it has the Lowercase
+ or Uppercase property or has a General_Category value of
+ Titlecase_Letter. */
+
+#if 0
+
#include "unictype.h"
bool
uc_is_cased (ucs4_t uc)
{
- /* Quoting the Unicode standard:
- Definition: A character is defined to be "cased" if it has the Lowercase
- or Uppercase property or has a General_Category value of
- Titlecase_Letter. */
return (uc_is_property_lowercase (uc)
|| uc_is_property_uppercase (uc)
|| uc_is_general_category (uc, UC_TITLECASE_LETTER));
}
+
+#else
+
+#include "unictype/bitmap.h"
+
+/* Define u_casing_property_cased table. */
+#include "cased.h"
+
+bool
+uc_is_cased (ucs4_t uc)
+{
+ return bitmap_lookup (&u_casing_property_cased, uc);
+}
+
+#endif
diff --git a/lib/unicase/cased.h b/lib/unicase/cased.h
new file mode 100644
index 0000000000..ea2c2eaa8d
--- /dev/null
+++ b/lib/unicase/cased.h
@@ -0,0 +1,343 @@
+/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
+/* Casing Properties of Unicode characters. */
+/* Generated automatically by gen-ctype.c for Unicode 5.1.0. */
+#define header_0 16
+#define header_2 9
+#define header_3 127
+#define header_4 15
+static const
+struct
+ {
+ int header[1];
+ int level1[2];
+ short level2[2 << 7];
+ /*unsigned*/ int level3[15 << 4];
+ }
+u_casing_property_cased =
+{
+ { 2 },
+ {
+ 3 * sizeof (int) / sizeof (short) + 0,
+ 3 * sizeof (int) / sizeof (short) + 128
+ },
+ {
+ 3 + 256 * sizeof (short) / sizeof (int) + 0,
+ 3 + 256 * sizeof (short) / sizeof (int) + 16,
+ 3 + 256 * sizeof (short) / sizeof (int) + 32,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 48,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 64,
+ 3 + 256 * sizeof (short) / sizeof (int) + 80,
+ 3 + 256 * sizeof (short) / sizeof (int) + 96,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 112,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 128,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 144,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 160,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 176,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 192,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 3 + 256 * sizeof (short) / sizeof (int) + 208,
+ 3 + 256 * sizeof (short) / sizeof (int) + 224,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1
+ },
+ {
+ 0x00000000, 0x00000000, 0x07FFFFFE, 0x07FFFFFE,
+ 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xF7FFFFFF, 0xFFFFFFF0, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFEFFFFF, 0x01FFFFFF, 0x00000003, 0x0000001F,
+ 0x00000000, 0x00000000, 0x00000020, 0x3CCF0000,
+ 0xFFFFD740, 0xFFFFFFFB, 0xFFFFFFFF, 0xFFBFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFC03, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFE000F, 0x007FFFFF, 0xFFFFFFFE,
+ 0x000000FF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0xFFFFFFFF, 0x0000003F, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
+ 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x80020000,
+ 0x001F0000, 0x00000000, 0x00000000, 0x00000000,
+ 0x3E2FFC84, 0xF21FBD50, 0x000043E0, 0xFFFFFFFF,
+ 0x00000018, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0xFFC00000, 0xFFFFFFFF, 0x000003FF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFF7FFF, 0x7FFFFFFF, 0x3FFEFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000001F,
+ 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00003FFC,
+ 0x00FFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x000018FF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00F8007F, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
+ 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
+ 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFF3F, 0xF7FFFFFD, 0xF7FFFFFF,
+ 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
+ 0xFFFFFDFF, 0xFFFFFDFF, 0x00000FF7, 0x00000000
+ }
+};
diff --git a/lib/unicase/ignorable.c b/lib/unicase/ignorable.c
index 609c3d394f..84b2634ccb 100644
--- a/lib/unicase/ignorable.c
+++ b/lib/unicase/ignorable.c
@@ -1,5 +1,5 @@
/* Test whether a Unicode character is case-ignorable.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -20,21 +20,30 @@
/* Specification. */
#include "caseprop.h"
+/* Quoting the Unicode standard:
+ Definition: A character is defined to be "case-ignorable" if it has the
+ value MidLetter {or the value MidNumLet} for the Word_Break property or
+ its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
+ Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
+ The text marked in braces was added in Unicode 5.1.0, see
+ <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
+ Definition of case-ignorable". */
+/* Since this predicate is only used for the "Before C" and "After C"
+ conditions of FINAL_SIGMA, we exclude the "cased" characters here.
+ This simplifies the evaluation of the regular expressions
+ \p{cased} (\p{case-ignorable})* C
+ and
+ C (\p{case-ignorable})* \p{cased}
+ */
+
+#if 0
+
#include "unictype.h"
#include "uniwbrk.h"
bool
uc_is_case_ignorable (ucs4_t uc)
{
- /* Quoting the Unicode standard:
- Definition: A character is defined to be "case-ignorable" if it has the
- value MidLetter {or the value MidNumLet} for the Word_Break property or
- its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
- Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
- The text marked in braces was added in Unicode 5.1.0, see
- <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
- Definition of case-ignorable". */
-
int wbp = uc_wordbreak_property (uc);
return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
@@ -42,5 +51,21 @@ uc_is_case_ignorable (ucs4_t uc)
| UC_CATEGORY_MASK_Me
| UC_CATEGORY_MASK_Cf
| UC_CATEGORY_MASK_Lm
- | UC_CATEGORY_MASK_Sk));
+ | UC_CATEGORY_MASK_Sk))
+ && !uc_is_cased (uc);
+}
+
+#else
+
+#include "unictype/bitmap.h"
+
+/* Define u_casing_property_case_ignorable table. */
+#include "ignorable.h"
+
+bool
+uc_is_case_ignorable (ucs4_t uc)
+{
+ return bitmap_lookup (&u_casing_property_case_ignorable, uc);
}
+
+#endif
diff --git a/lib/unicase/ignorable.h b/lib/unicase/ignorable.h
new file mode 100644
index 0000000000..3b29290f82
--- /dev/null
+++ b/lib/unicase/ignorable.h
@@ -0,0 +1,544 @@
+/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
+/* Casing Properties of Unicode characters. */
+/* Generated automatically by gen-ctype.c for Unicode 5.1.0. */
+#define header_0 16
+#define header_2 9
+#define header_3 127
+#define header_4 15
+static const
+struct
+ {
+ int header[1];
+ int level1[15];
+ short level2[3 << 7];
+ /*unsigned*/ int level3[30 << 4];
+ }
+u_casing_property_case_ignorable =
+{
+ { 15 },
+ {
+ 16 * sizeof (int) / sizeof (short) + 0,
+ 16 * sizeof (int) / sizeof (short) + 128,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 * sizeof (int) / sizeof (short) + 256
+ },
+ {
+ 16 + 384 * sizeof (short) / sizeof (int) + 0,
+ 16 + 384 * sizeof (short) / sizeof (int) + 16,
+ 16 + 384 * sizeof (short) / sizeof (int) + 32,
+ 16 + 384 * sizeof (short) / sizeof (int) + 48,
+ 16 + 384 * sizeof (short) / sizeof (int) + 64,
+ 16 + 384 * sizeof (short) / sizeof (int) + 80,
+ 16 + 384 * sizeof (short) / sizeof (int) + 96,
+ 16 + 384 * sizeof (short) / sizeof (int) + 112,
+ 16 + 384 * sizeof (short) / sizeof (int) + 128,
+ 16 + 384 * sizeof (short) / sizeof (int) + 144,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 160,
+ 16 + 384 * sizeof (short) / sizeof (int) + 176,
+ 16 + 384 * sizeof (short) / sizeof (int) + 192,
+ 16 + 384 * sizeof (short) / sizeof (int) + 208,
+ 16 + 384 * sizeof (short) / sizeof (int) + 224,
+ 16 + 384 * sizeof (short) / sizeof (int) + 240,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 256,
+ 16 + 384 * sizeof (short) / sizeof (int) + 272,
+ 16 + 384 * sizeof (short) / sizeof (int) + 288,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 304,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 320,
+ 16 + 384 * sizeof (short) / sizeof (int) + 336,
+ 16 + 384 * sizeof (short) / sizeof (int) + 352,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 368,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 384,
+ 16 + 384 * sizeof (short) / sizeof (int) + 400,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 416,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 432,
+ 16 + 384 * sizeof (short) / sizeof (int) + 448,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 16 + 384 * sizeof (short) / sizeof (int) + 464,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1
+ },
+ {
+ 0x00000000, 0x04004080, 0x40000000, 0x00000001,
+ 0x00000000, 0x0190A100, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0xFE000000, 0xFFFFFFFC, 0xFFFFFFE0,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFDF, 0x0030FFFF,
+ 0x000000B0, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x000003F8, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x02000000, 0x00000000,
+ 0xFFFE0000, 0xBFFFFFFF, 0x000000B6, 0x00100000,
+ 0x07FF000F, 0x00000000, 0x7FFFF801, 0x00010000,
+ 0x00000000, 0x00000000, 0xFFC00000, 0x00003DFF,
+ 0x00028000, 0xFFFF0000, 0x000007FF, 0x00000000,
+ 0x00000000, 0x0001FFC0, 0x00000000, 0x043FF800,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000006, 0x10000000, 0x001E21FE, 0x0002000C,
+ 0x00000002, 0x10000000, 0x0000201E, 0x0000000C,
+ 0x00000006, 0x10000000, 0x00023986, 0x00230000,
+ 0x00000006, 0x10000000, 0x000021BE, 0x0000000C,
+ 0x00000002, 0x90000000, 0x0040201E, 0x0000000C,
+ 0x00000004, 0x00000000, 0x00002001, 0x00000000,
+ 0x00000000, 0xC0000000, 0x00603DC1, 0x0000000C,
+ 0x00000000, 0x90000000, 0x00003040, 0x0000000C,
+ 0x00000000, 0x00000000, 0x0000201E, 0x0000000C,
+ 0x00000000, 0x00000000, 0x005C0400, 0x00000000,
+ 0x00000000, 0x07F20000, 0x00007FC0, 0x00000000,
+ 0x00000000, 0x1BF20000, 0x00003F40, 0x00000000,
+ 0x03000000, 0x02A00000, 0x00000000, 0x7FFE0000,
+ 0xFEFF00DF, 0x1FFFFFFF, 0x00000040, 0x00000000,
+ 0x00000000, 0x66FDE000, 0xC3000000, 0x001E0001,
+ 0x00002064, 0x00000000, 0x00000000, 0x10000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x80000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x001C0000, 0x001C0000, 0x000C0000, 0x000C0000,
+ 0x00000000, 0x3FB00000, 0x208FFE40, 0x00000000,
+ 0x00003800, 0x00000000, 0x00000008, 0x00000000,
+ 0x00000000, 0x00000200, 0x00000000, 0x00000000,
+ 0x00000000, 0x0E040187, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x01800000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x0000000F, 0x17D00000, 0x00000004, 0x000FF800,
+ 0x00000003, 0x0000033C, 0x00000000, 0x00000000,
+ 0x00000000, 0x00CFF000, 0x00000000, 0x3F000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xC000007F,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0xA0000000, 0xE000E003, 0x6000E000,
+ 0x0300F800, 0x00007C90, 0x00000000, 0x0000FC1F,
+ 0x00000000, 0x00000000, 0xFFFF0000, 0x0001FFFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00008000,
+ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF,
+ 0x00000000, 0x00008000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000020, 0x083EFC00, 0x00000000, 0x00000000,
+ 0x7E000000, 0x00000000, 0x00000000, 0x70000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00200000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00001000, 0x00000000, 0x00000000, 0xB0078000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0x00000003, 0x00000000, 0x00000000,
+ 0x00000700, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000844, 0x00000060, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000010, 0x00000000,
+ 0x00000000, 0x00003FC0, 0x0003FF80, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00667E00, 0x00001008, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x40000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x0008FFFF, 0x0000007F, 0x00240000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x80000000,
+ 0x04004080, 0x40000000, 0x00000001, 0x00010000,
+ 0xC0000000, 0x00000000, 0x00000000, 0x0E000008,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x20000000,
+ 0x0000F06E, 0x87000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0xFFF80380,
+ 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x0000001C, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF
+ }
+};
diff --git a/modules/unicase/cased b/modules/unicase/cased
index e679235b9c..f2b1843647 100644
--- a/modules/unicase/cased
+++ b/modules/unicase/cased
@@ -4,14 +4,13 @@ Test whether a Unicode character is "cased".
Files:
lib/unicase/caseprop.h
lib/unicase/cased.c
+lib/unicase/cased.h
+lib/unictype/bitmap.h
Depends-on:
-unictype/category-test
-unictype/category-Lt
-unictype/property-lowercase
-unictype/property-uppercase
unitypes
stdbool
+inline
configure.ac:
diff --git a/modules/unicase/ignorable b/modules/unicase/ignorable
index 7ea017fd64..5d316823d1 100644
--- a/modules/unicase/ignorable
+++ b/modules/unicase/ignorable
@@ -4,12 +4,13 @@ Test whether a Unicode character is "case-ignorable".
Files:
lib/unicase/caseprop.h
lib/unicase/ignorable.c
+lib/unicase/ignorable.h
+lib/unictype/bitmap.h
Depends-on:
-uniwbrk/wordbreak-property
-unictype/category-of
unitypes
stdbool
+inline
configure.ac:
diff --git a/tests/unicase/test-u16-tolower.c b/tests/unicase/test-u16-tolower.c
index 406e45a70c..5ec764c4ad 100644
--- a/tests/unicase/test-u16-tolower.c
+++ b/tests/unicase/test-u16-tolower.c
@@ -185,6 +185,67 @@ main ()
};
ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
}
+ { /* "Σ" -> "σ" */
+ static const uint16_t input[] = { 0x03A3 };
+ static const uint16_t casemapped[] = { 0x03C3 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ" -> "ας" */
+ static const uint16_t input[] = { 0x0391, 0x03A3 };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if not followed by a case-ignorable sequence and
+ then a cased letter. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* "ΑΣΑ" -> "ασα" */
+ static const uint16_t input[] = { 0x0391, 0x03A3, 0x0391 };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:" -> "ας:" */
+ static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C2, 0x003A };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:Α" -> "ασ:α" */
+ static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ" -> "ασ:ͺ" */
+ static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ " -> "ασ:ͺ " */
+ static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 };
+ static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if preceded by a case-ignorable sequence and
+ a cased letter before it. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* ":Σ" -> ":σ" */
+ static const uint16_t input[] = { 0x003A, 0x03A3 };
+ static const uint16_t casemapped[] = { 0x003A, 0x03C3 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "Α:Σ" -> "α:ς" */
+ static const uint16_t input[] = { 0x0391, 0x003A, 0x03A3 };
+ static const uint16_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ͺ:Σ" -> "ͺ:ς" */
+ static const uint16_t input[] = { 0x037A, 0x003A, 0x03A3 };
+ static const uint16_t casemapped[] = { 0x037A, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* " ͺ:Σ" -> " ͺ:ς" */
+ static const uint16_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 };
+ static const uint16_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
return 0;
}
diff --git a/tests/unicase/test-u32-tolower.c b/tests/unicase/test-u32-tolower.c
index a4318787b2..7f348da484 100644
--- a/tests/unicase/test-u32-tolower.c
+++ b/tests/unicase/test-u32-tolower.c
@@ -185,6 +185,67 @@ main ()
};
ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
}
+ { /* "Σ" -> "σ" */
+ static const uint32_t input[] = { 0x03A3 };
+ static const uint32_t casemapped[] = { 0x03C3 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ" -> "ας" */
+ static const uint32_t input[] = { 0x0391, 0x03A3 };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if not followed by a case-ignorable sequence and
+ then a cased letter. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* "ΑΣΑ" -> "ασα" */
+ static const uint32_t input[] = { 0x0391, 0x03A3, 0x0391 };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:" -> "ας:" */
+ static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C2, 0x003A };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:Α" -> "ασ:α" */
+ static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ" -> "ασ:ͺ" */
+ static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ " -> "ασ:ͺ " */
+ static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 };
+ static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if preceded by a case-ignorable sequence and
+ a cased letter before it. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* ":Σ" -> ":σ" */
+ static const uint32_t input[] = { 0x003A, 0x03A3 };
+ static const uint32_t casemapped[] = { 0x003A, 0x03C3 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "Α:Σ" -> "α:ς" */
+ static const uint32_t input[] = { 0x0391, 0x003A, 0x03A3 };
+ static const uint32_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ͺ:Σ" -> "ͺ:ς" */
+ static const uint32_t input[] = { 0x037A, 0x003A, 0x03A3 };
+ static const uint32_t casemapped[] = { 0x037A, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* " ͺ:Σ" -> " ͺ:ς" */
+ static const uint32_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 };
+ static const uint32_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
return 0;
}
diff --git a/tests/unicase/test-u8-tolower.c b/tests/unicase/test-u8-tolower.c
index 1b43a1b746..6c0a5df610 100644
--- a/tests/unicase/test-u8-tolower.c
+++ b/tests/unicase/test-u8-tolower.c
@@ -191,6 +191,67 @@ main ()
};
ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
}
+ { /* "Σ" -> "σ" */
+ static const uint8_t input[] = { 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0xCF, 0x83 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ" -> "ας" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if not followed by a case-ignorable sequence and
+ then a cased letter. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* "ΑΣΑ" -> "ασα" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0xCE, 0x91 };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0xCE, 0xB1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:" -> "ας:" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82, 0x3A };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:Α" -> "ασ:α" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCE, 0x91 };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCE, 0xB1 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ" -> "ασ:ͺ" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ΑΣ:ͺ " -> "ασ:ͺ " */
+ static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA, 0x20 };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA, 0x20 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ /* It's a final sigma only if preceded by a case-ignorable sequence and
+ a cased letter before it. Note that U+0345 and U+037A are simultaneously
+ case-ignorable and cased (which is a bit paradoxical). */
+ { /* ":Σ" -> ":σ" */
+ static const uint8_t input[] = { 0x3A, 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0x3A, 0xCF, 0x83 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "Α:Σ" -> "α:ς" */
+ static const uint8_t input[] = { 0xCE, 0x91, 0x3A, 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0xCE, 0xB1, 0x3A, 0xCF, 0x82 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* "ͺ:Σ" -> "ͺ:ς" */
+ static const uint8_t input[] = { 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0xCD, 0xBA, 0x3A, 0xCF, 0x82 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
+ { /* " ͺ:Σ" -> " ͺ:ς" */
+ static const uint8_t input[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 };
+ static const uint8_t casemapped[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCF, 0x82 };
+ ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0);
+ }
return 0;
}