summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--locale/C-collate.c130
-rw-r--r--locale/Makefile2
-rw-r--r--locale/categories.def29
-rw-r--r--locale/elem-hash.h34
-rw-r--r--locale/langinfo.h3
-rw-r--r--locale/programs/ld-collate.c119
-rw-r--r--posix/regex.c168
8 files changed, 356 insertions, 144 deletions
diff --git a/ChangeLog b/ChangeLog
index 59846bf425..47132bdea2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+1999-12-31 Ulrich Drepper <drepper@cygnus.com>
+
+ * locale/langinfo.h: Add constants for wide character collation
+ symbol table.
+ * locale/categories.def: Add appropriate entries for collate symbol
+ table entries.
+ * locale/C-collate.c: Add initializers for new entries.
+ Remove commented out code.
+ * locale/elem-hash.h: New file.
+ * locale/Makefile (distribute): Add elem-hash.h.
+ * locale/programs/ld-collate.c: Implement output of collate symbol
+ table.
+
+ * posix/regex.c: Implement collation class handling.
+
1999-12-30 Ulrich Drepper <drepper@cygnus.com>
* posix/regex.c (regex_compile): Implement equivalence class handling.
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 7302682b30..679ed30871 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -20,137 +20,12 @@
#include <endian.h>
#include "localeinfo.h"
-#if 0
-/* These tables' entries contain values which make the function behave
- according to POSIX.2 Table 2-8 ``LC_COLLATE Category Definition in
- the POSIX Locale''. */
-
-const uint32_t _nl_C_LC_COLLATE_symbol_hash[446] =
-{
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x00000154u, 0x00000060u, 0xffffffffu, 0xffffffffu,
- 0x0000004fu, 0x0000001au, 0x00000085u, 0x00000030u, 0xffffffffu, 0xffffffffu,
- 0x000002beu, 0x000000fau, 0xffffffffu, 0xffffffffu, 0x0000014eu, 0x0000005eu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000bbu, 0x00000044u,
- 0xffffffffu, 0xffffffffu, 0x000000efu, 0x0000004cu, 0x00000147u, 0x0000005cu,
- 0x000000a0u, 0x0000003eu, 0x00000000u, 0x00000000u, 0x00000038u, 0x00000016u,
- 0x00000094u, 0x00000038u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x00000140u, 0x0000005au, 0x0000018cu, 0x00000076u,
- 0x0000007du, 0x0000002cu, 0xffffffffu, 0xffffffffu, 0x00000115u, 0x00000052u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000285u, 0x000000deu,
- 0x00000171u, 0x0000006cu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0x00000289u, 0x000000e2u, 0x000002d8u, 0x000000feu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000022u, 0x00000010u,
- 0x0000028fu, 0x000000e8u, 0x00000069u, 0x00000022u, 0x0000006du, 0x00000024u,
- 0x00000071u, 0x00000026u, 0x00000075u, 0x00000028u, 0xffffffffu, 0xffffffffu,
- 0x00000295u, 0x000000eeu, 0xffffffffu, 0xffffffffu, 0x00000297u, 0x000000f0u,
- 0xffffffffu, 0xffffffffu, 0x00000299u, 0x000000f2u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000213u, 0x000000b6u,
- 0xffffffffu, 0xffffffffu, 0x00000014u, 0x0000000au, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x00000227u, 0x000000b8u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x0000015du, 0x00000064u,
- 0xffffffffu, 0xffffffffu, 0x000001ffu, 0x000000a2u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x0000013au, 0x00000058u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000010u, 0x00000008u,
- 0x000001dfu, 0x00000082u, 0x000001e1u, 0x00000084u, 0x00000167u, 0x00000068u,
- 0x00000004u, 0x00000002u, 0x000001e7u, 0x0000008au, 0x00000186u, 0x00000074u,
- 0x000001ebu, 0x0000008eu, 0x000001edu, 0x00000090u, 0x000001efu, 0x00000092u,
- 0x000001f1u, 0x00000094u, 0x000001f3u, 0x00000096u, 0x000001f5u, 0x00000098u,
- 0x000001f7u, 0x0000009au, 0x000001f9u, 0x0000009cu, 0x000001a5u, 0x0000007au,
- 0x000001fdu, 0x000000a0u, 0x00000030u, 0x00000014u, 0x00000201u, 0x000000a4u,
- 0x00000203u, 0x000000a6u, 0x00000205u, 0x000000a8u, 0x00000207u, 0x000000aau,
- 0x00000209u, 0x000000acu, 0x0000020bu, 0x000000aeu, 0x0000020du, 0x000000b0u,
- 0x0000020fu, 0x000000b2u, 0x00000211u, 0x000000b4u, 0xffffffffu, 0xffffffffu,
- 0x0000009cu, 0x0000003cu, 0xffffffffu, 0xffffffffu, 0x00000098u, 0x0000003au,
- 0x0000016cu, 0x0000006au, 0xffffffffu, 0xffffffffu, 0x00000269u, 0x000000c2u,
- 0x0000026bu, 0x000000c4u, 0x0000026du, 0x000000c6u, 0x0000026fu, 0x000000c8u,
- 0x00000271u, 0x000000cau, 0x00000273u, 0x000000ccu, 0x00000275u, 0x000000ceu,
- 0x00000277u, 0x000000d0u, 0x00000279u, 0x000000d2u, 0x0000027bu, 0x000000d4u,
- 0x0000027du, 0x000000d6u, 0x0000027fu, 0x000000d8u, 0x00000281u, 0x000000dau,
- 0x00000283u, 0x000000dcu, 0x00000090u, 0x00000036u, 0x00000287u, 0x000000e0u,
- 0x0000005fu, 0x0000001cu, 0x0000028bu, 0x000000e4u, 0x0000028du, 0x000000e6u,
- 0x00000089u, 0x00000032u, 0x000001c3u, 0x0000007eu, 0x00000293u, 0x000000ecu,
- 0x00000062u, 0x0000001eu, 0x000001b1u, 0x0000007cu, 0x00000130u, 0x00000056u,
- 0x0000029bu, 0x000000f4u, 0x00000196u, 0x00000078u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x00000081u, 0x0000002eu, 0x00000251u, 0x000000beu,
- 0x00000079u, 0x0000002au, 0x0000029du, 0x000000f6u, 0xffffffffu, 0xffffffffu,
- 0x0000025cu, 0x000000c0u, 0xffffffffu, 0xffffffffu, 0x0000002cu, 0x00000012u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000a4u, 0x00000040u,
- 0xffffffffu, 0xffffffffu, 0x000002b0u, 0x000000f8u, 0xffffffffu, 0xffffffffu,
- 0x000000f9u, 0x0000004eu, 0xffffffffu, 0xffffffffu, 0x0000001cu, 0x0000000eu,
- 0xffffffffu, 0xffffffffu, 0x0000017bu, 0x00000070u, 0x0000000cu, 0x00000006u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000001e3u, 0x00000086u,
- 0xffffffffu, 0xffffffffu, 0x000001e5u, 0x00000088u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x000001d1u, 0x00000080u, 0x000001e9u, 0x0000008cu,
- 0x0000008cu, 0x00000034u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0x00000291u, 0x000000eau, 0xffffffffu, 0xffffffffu,
- 0x00000008u, 0x00000004u, 0xffffffffu, 0xffffffffu, 0x00000181u, 0x00000072u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000231u, 0x000000bau,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000cau, 0x00000046u,
- 0x00000246u, 0x000000bcu, 0xffffffffu, 0xffffffffu, 0x000001fbu, 0x0000009eu,
- 0x000000d6u, 0x00000048u, 0x00000018u, 0x0000000cu, 0xffffffffu, 0xffffffffu,
- 0x00000159u, 0x00000062u, 0xffffffffu, 0xffffffffu, 0x000000aau, 0x00000042u,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000e2u, 0x0000004au,
- 0x00000175u, 0x0000006eu, 0xffffffffu, 0xffffffffu, 0x00000104u, 0x00000050u,
- 0x00000065u, 0x00000020u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000002d2u, 0x000000fcu,
- 0xffffffffu, 0xffffffffu, 0x00000161u, 0x00000066u, 0x00000045u, 0x00000018u,
- 0xffffffffu, 0xffffffffu, 0x00000127u, 0x00000054u, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
- 0xffffffffu, 0xffffffffu
-};
-
-const char _nl_C_LC_COLLATE_symbol_strings[732] =
- "NUL\0" "SOH\0" "STX\0" "ETX\0" "EOT\0" "ENQ\0" "ACK\0" "alert\0"
- "backspace\0" "tab\0" "newline\0" "vertical-tab\0" "form-feed\0"
- "carriage-return\0" "SI\0" "SO\0" "DLE\0" "DC1\0" "DC2\0" "DC3\0" "DC4\0"
- "NAK\0" "SYN\0" "ETB\0" "CAN\0" "EM\0" "SUB\0" "ESC\0" "IS4\0" "IS3\0"
- "IS2\0" "IS1\0" "space\0" "exclamation-mark\0" "quotation-mark\0"
- "number-sign\0" "dollar-sign\0" "percent-sign\0" "ampersand\0"
- "apostrophe\0" "left-parenthesis\0" "right-parenthesis\0" "asterisk\0"
- "plus-sign\0" "comma\0" "hyphen\0" "period\0" "slash\0" "zero\0" "one\0"
- "two\0" "three\0" "four\0" "five\0" "six\0" "seven\0" "eight\0" "nine\0"
- "colon\0" "semicolon\0" "less-than-sign\0" "equals-sign\0"
- "greater-than-sign\0" "question-mark\0" "commercial-at\0" "A\0" "B\0" "C\0"
- "D\0" "E\0" "F\0" "G\0" "H\0" "I\0" "J\0" "K\0" "L\0" "M\0" "N\0" "O\0"
- "P\0" "Q\0" "R\0" "S\0" "T\0" "U\0" "V\0" "W\0" "X\0" "Y\0" "Z\0"
- "left-square-bracket\0" "backslash\0" "right-square-bracket\0"
- "circumflex\0" "underscore\0" "grave-accent\0" "a\0" "b\0" "c\0" "d\0" "e\0"
- "f\0" "g\0" "h\0" "i\0" "j\0" "k\0" "l\0" "m\0" "n\0" "o\0" "p\0" "q\0"
- "r\0" "s\0" "t\0" "u\0" "v\0" "w\0" "x\0" "y\0" "z\0" "left-curly-bracket\0"
- "vertical-line\0" "right-curly-bracket\0" "tilde\0" "DEL\0";
-
-const uint32_t _nl_C_LC_COLLATE_symbol_classes[256] =
-{
- 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7,
- 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 14, 1, 15,
- 1, 16, 1, 17, 1, 18, 1, 19, 1, 20, 1, 21, 1, 22, 1, 23,
- 1, 24, 1, 25, 1, 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 31,
- 1, 32, 1, 33, 1, 34, 1, 35, 1, 36, 1, 37, 1, 38, 1, 39,
- 1, 40, 1, 41, 1, 42, 1, 43, 1, 44, 1, 45, 1, 46, 1, 47,
- 1, 48, 1, 49, 1, 50, 1, 51, 1, 52, 1, 53, 1, 54, 1, 55,
- 1, 56, 1, 57, 1, 58, 1, 59, 1, 60, 1, 61, 1, 62, 1, 63,
- 1, 64, 1, 65, 1, 66, 1, 67, 1, 68, 1, 69, 1, 70, 1, 71,
- 1, 72, 1, 73, 1, 74, 1, 75, 1, 76, 1, 77, 1, 78, 1, 79,
- 1, 80, 1, 81, 1, 82, 1, 83, 1, 84, 1, 85, 1, 86, 1, 87,
- 1, 88, 1, 89, 1, 90, 1, 91, 1, 92, 1, 93, 1, 94, 1, 95,
- 1, 96, 1, 97, 1, 98, 1, 99, 1, 100, 1, 101, 1, 102, 1, 103,
- 1, 104, 1, 105, 1, 106, 1, 107, 1, 108, 1, 109, 1, 110, 1, 111,
- 1, 112, 1, 113, 1, 114, 1, 115, 1, 116, 1, 117, 1, 118, 1, 119,
- 1, 120, 1, 121, 1, 122, 1, 123, 1, 124, 1, 125, 1, 126, 1, 127
-};
-#endif
-
const struct locale_data _nl_C_LC_COLLATE =
{
_nl_C_name,
NULL, 0, 0, /* no file mapped */
UNDELETABLE,
- 13,
+ 16,
{
{ word: 0 },
{ string: NULL },
@@ -164,6 +39,9 @@ const struct locale_data _nl_C_LC_COLLATE =
{ string: NULL },
{ string: NULL },
{ string: NULL },
+ { string: NULL },
+ { word: 0 },
+ { string: NULL },
{ string: NULL }
}
};
diff --git a/locale/Makefile b/locale/Makefile
index a087d7278f..9a34847e99 100644
--- a/locale/Makefile
+++ b/locale/Makefile
@@ -23,7 +23,7 @@ subdir := locale
headers = locale.h langinfo.h xlocale.h
distribute = localeinfo.h categories.def iso-639.def iso-3166.def \
- iso-4217.def weight.h strlen-hash.h \
+ iso-4217.def weight.h strlen-hash.h elem-hash.h \
$(addprefix programs/, \
locale.c localedef.c \
$(localedef-modules:=.c) $(locale-modules:=.c) \
diff --git a/locale/categories.def b/locale/categories.def
index 7ebb8536a5..e055d74a74 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -42,19 +42,22 @@ DEFINE_CATEGORY
(
LC_COLLATE, "LC_COLLATE",
(
- DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE, "collate-hash-size", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS, "collate-hash-layers", std, word)
- DEFINE_ELEMENT (_NL_COLLATE_NAMES, "collate-names", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, string)
- DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word)
+ DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE, "collate-hash-size", std, word)
+ DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS, "collate-hash-layers", std, word)
+ DEFINE_ELEMENT (_NL_COLLATE_NAMES, "collate-names", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
+ DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
), NO_POSTLOAD)
diff --git a/locale/elem-hash.h b/locale/elem-hash.h
new file mode 100644
index 0000000000..0529214e6c
--- /dev/null
+++ b/locale/elem-hash.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 1999 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Ulrich Drepper, <drepper@cygnus.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+
+/* The hashing function used for the table with collation symbols. */
+static inline int32_t
+elem_hash (const char *str, int_fast32_t n)
+{
+ int32_t result = n;
+
+ while (n > 0)
+ {
+ n <<= 3;
+ n += *str++;
+ }
+
+ return result;
+}
diff --git a/locale/langinfo.h b/locale/langinfo.h
index e46fc65184..8ba42fa036 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -243,6 +243,9 @@ enum
_NL_COLLATE_WEIGHTWC,
_NL_COLLATE_EXTRAWC,
_NL_COLLATE_INDIRECTWC,
+ _NL_COLLATE_SYMB_HASH_SIZEMB,
+ _NL_COLLATE_SYMB_TABLEMB,
+ _NL_COLLATE_SYMB_EXTRAMB,
_NL_NUM_LC_COLLATE,
/* LC_CTYPE category: character classification.
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 3eff699e7b..8eb47d7f8e 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -25,12 +25,14 @@
#include <error.h>
#include <stdlib.h>
#include <wchar.h>
+#include <sys/param.h>
#include "charmap.h"
#include "localeinfo.h"
#include "linereader.h"
#include "locfile.h"
#include "localedef.h"
+#include "elem-hash.h"
/* Uncomment the following line in the production version. */
/* #define NDEBUG 1 */
@@ -88,11 +90,13 @@ struct element_t
we changed if necessary but I doubt this is necessary. */
unsigned int used_in_level;
+ struct element_list_t *weights;
+ /* Index in the `weight' table in the output file for the character. */
+ int32_t weights_idx;
+
/* Nonzero if this is a real character definition. */
int is_character;
- struct element_list_t *weights;
-
/* Where does the definition come from. */
const char *file;
size_t line;
@@ -297,6 +301,7 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
/* Will be allocated later. */
newp->weights = NULL;
+ newp->weights_idx = 0;
newp->file = NULL;
newp->line = 0;
@@ -1804,6 +1809,9 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
obstack_grow (pool, buf, len);
}
+ /* Remember the index. */
+ elem->weights_idx = retval;
+
return retval | ((elem->section->ruleidx & 0x7f) << 24);
}
@@ -1866,7 +1874,10 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
uint32_t *names;
uint32_t *tablewc;
size_t table_size;
+ uint32_t elem_size;
+ uint32_t *elem_table;
int i;
+ struct element_t *runp;
data.magic = LIMAGIC (LC_COLLATE);
data.n = nelems;
@@ -2381,6 +2392,110 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
++cnt;
+ /* Finally write the table with collation element names out. It is
+ a hash table with a simple function which gets the name of the
+ character as the input. One character might have many names. The
+ value associated with the name is an index into the weight table
+ where we are then interested in the first-level weight value.
+
+ To determine how large the table should be we are counting the
+ elements have to put in. Since we are using internal chaining
+ using a secondary hash function we have to make the table a bit
+ larger to avoid extremely long search times. We can achieve
+ good results with a 40% larger table than there are entries. */
+ elem_size = 0;
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL && runp->weights != NULL)
+ /* Yep, the element really counts. */
+ ++elem_size;
+
+ runp = runp->next;
+ }
+ /* Add 40% and find the next prime number. */
+ elem_size = MIN (next_prime (elem_size * 1.4), 257);
+
+ /* Allocate the table. Each entry consists of two words: the hash
+ value and an index in a secondary table which provides the index
+ into the weight table and the string itself (so that a match can
+ be determined). */
+ elem_table = (uint32_t *) obstack_alloc (&extrapool,
+ elem_size * 2 * sizeof (uint32_t));
+ memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
+
+ /* Now add the elements. */
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL && runp->weights != NULL)
+ {
+ /* Compute the hash value of the name. */
+ uint32_t namelen = strlen (runp->name);
+ uint32_t hash = elem_hash (runp->name, namelen);
+ size_t idx = hash % elem_size;
+
+ if (elem_table[idx * 2] != 0)
+ {
+ /* The spot is already take. Try iterating using the value
+ from the secondary hashing function. */
+ size_t iter = hash % (elem_size - 2);
+
+ do
+ {
+ idx += iter;
+ if (idx >= elem_size)
+ idx -= elem_size;
+ }
+ while (elem_table[idx * 2] != 0);
+
+ /* This is the spot where we will insert the value. */
+ elem_table[idx * 2] = hash;
+ elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
+
+ /* Now add the index into the weights table. We know the
+ address is always 32bit aligned. */
+ if (sizeof (int) == sizeof (int32_t))
+ obstack_int_grow (&extrapool, runp->weights_idx);
+ else
+ obstack_grow (&extrapool, &runp->weights_idx,
+ sizeof (int32_t));
+
+ /* The the string itself including length. */
+ obstack_1grow (&extrapool, namelen);
+ obstack_grow (&extrapool, runp->name, namelen);
+
+ /* And align again to 32 bits. */
+ if ((1 + namelen) % sizeof (int32_t) != 0)
+ obstack_grow (&extrapool, "\0\0",
+ (sizeof (int32_t)
+ - (1 + namelen) % sizeof (int32_t)));
+ }
+ }
+
+ runp = runp->next;
+ }
+
+ /* Prepare to write out this data. */
+ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
+ iov[2 + cnt].iov_base = &elem_size;
+ iov[2 + cnt].iov_len = sizeof (int32_t);
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
+ iov[2 + cnt].iov_base = elem_table;
+ iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
+ iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
+ iov[2 + cnt].iov_base = obstack_finish (&extrapool);
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+
assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
diff --git a/posix/regex.c b/posix/regex.c
index e1a6917f71..a59f5d4a71 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -82,6 +82,7 @@
/* We are also using some library internals. */
# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
# include <langinfo.h>
#endif
@@ -2378,12 +2379,13 @@ regex_compile (pattern, size, syntax, bufp)
had_char_class = false;
}
}
-#ifdef _LIBC
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
{
unsigned char str[MB_LEN_MAX + 1];
+#ifdef _LIBC
uint32_t nrules =
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+#endif
PATFETCH (c);
c1 = 0;
@@ -2412,7 +2414,9 @@ regex_compile (pattern, size, syntax, bufp)
character set and therefore we cannot have character
with more than one byte in the multibyte
representation. */
+#ifdef _LIBC
if (nrules == 0)
+#endif
{
if (c1 != 1)
FREE_STACK_RETURN (REG_ECOLLATE);
@@ -2424,6 +2428,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Set the bit for the character. */
SET_LIST_BIT (str[0]);
}
+#ifdef _LIBC
else
{
/* Try to match the byte sequence in `str' against
@@ -2495,8 +2500,168 @@ regex_compile (pattern, size, syntax, bufp)
}
}
}
+#endif
had_char_class = true;
}
+ }
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
+ {
+ unsigned char str[128]; /* Should be large enough. */
+#ifdef _LIBC
+ uint32_t nrules =
+ _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+#endif
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[='. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == '.' && *p == ']') || p == pend)
+ break;
+ if (c1 < sizeof (str))
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ if (c == '.' && *p == ']' && str[0] != '\0')
+ {
+ /* If we have no collation data we use the default
+ collation in which each character is the name
+ for its own class which contains only the one
+ character. It also means that ASCII is the
+ character set and therefore we cannot have character
+ with more than one byte in the multibyte
+ representation. */
+#ifdef _LIBC
+ if (nrules == 0)
+#endif
+ {
+ if (c1 != 1)
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Set the bit for the character. */
+ SET_LIST_BIT (str[0]);
+ }
+#ifdef _LIBC
+ else
+ {
+ /* Try to match the byte sequence in `str' against
+ those known to the collate implementation.
+ First find out whether the bytes in `str' are
+ actually from exactly one character. */
+ const unsigned char *weights;
+ int32_t table_size;
+ const int32_t *table;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+ int32_t idx;
+ int32_t elem;
+ const unsigned char *cp = str;
+ int32_t weight;
+ int32_t second;
+ int32_t hash;
+ int ch;
+
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ table_size =
+ _NL_CURRENT_WORD (LC_COLLATE,
+ _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+
+ /* Locate the character in the hashing table. */
+ hash = elem_hash (str, c1);
+
+ idx = 0;
+ elem = hash % table_size;
+ second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ && (c1 == extra[symb_table[2 * elem + 1]
+ + sizeof (int32_t)])
+ && memcmp (str,
+ &extra[symb_table[2 * elem + 1]
+ + sizeof (int32_t) + 1],
+ c1) == 0)
+ {
+ /* Yep, this is the entry. */
+ idx = *((int32_t *)
+ (extra
+ + symb_table[2 * elem + 1]));
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+
+ if (symb_table[2 * elem] == 0)
+ /* This is no valid character. */
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ /* Throw away the ] at the end of the equivalence
+ class. */
+ PATFETCH (c);
+
+ /* Now we have to go throught the whole table
+ and find all characters which have the same
+ first level weight.
+
+ XXX Note that this is not entirely correct.
+ we would have to match multibyte sequences
+ but this is not possible with the current
+ implementation. */
+ for (ch = 1; ch < 256; ++ch)
+ /* XXX This test would have to be changed if we
+ would allow matching multibyte sequences. */
+ if (table[ch] > 0)
+ {
+ int32_t idx2 = table[ch];
+ size_t len = weights[idx2];
+
+ /* Test whether the lenghts match. */
+ if (weights[idx] == len)
+ {
+ /* They do. New compare the bytes of
+ the weight. */
+ size_t cnt = 0;
+
+ while (cnt < len
+ && (weights[idx + 1 + cnt]
+ == weights[idx2 + 1 + cnt]))
+ ++len;
+
+ if (cnt == len)
+ /* They match. Mark the character as
+ acceptable. */
+ SET_LIST_BIT (ch);
+ }
+ }
+ }
+#endif
+ had_char_class = false;
+ }
else
{
c1++;
@@ -2507,7 +2672,6 @@ regex_compile (pattern, size, syntax, bufp)
had_char_class = false;
}
}
-#endif
else
{
had_char_class = false;