diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-09-05 02:41:25 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-09-05 02:41:25 +0000 |
commit | b79f74cd622578ce5eea1a3ed5840ac53d6b6d93 (patch) | |
tree | 2c0e56654a4df00616e8994f181434ddf3246549 | |
parent | bcf6d602849db60d9651ffade87f18282c75ebd4 (diff) | |
download | glibc-b79f74cd622578ce5eea1a3ed5840ac53d6b6d93.tar.gz |
Update.
2000-09-03 Bruno Haible <haible@clisp.cons.org>
* charmaps/EUC-TW: Add commented non-reversible mappings.
2000-09-03 Bruno Haible <haible@clisp.cons.org>
* charmaps/CP949: New file.
2000-09-03 Bruno Haible <haible@clisp.cons.org>
* charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF.
2000-09-03 Bruno Haible <haible@clisp.cons.org>
* charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to
U+007E.
-rw-r--r-- | elf/Makefile | 9 | ||||
-rw-r--r-- | iconv/gconv_builtin.c | 1 | ||||
-rw-r--r-- | iconv/gconv_db.c | 255 | ||||
-rw-r--r-- | iconv/gconv_dl.c | 8 | ||||
-rw-r--r-- | iconv/gconv_int.h | 4 | ||||
-rw-r--r-- | iconv/gconv_simple.c | 17 | ||||
-rw-r--r-- | iconv/gconv_trans.c | 8 | ||||
-rw-r--r-- | iconvdata/EUC-JP.irreversible | 6 | ||||
-rw-r--r-- | iconvdata/ISIRI-3342.irreversible | 52 | ||||
-rw-r--r-- | iconvdata/Makefile | 21 | ||||
-rw-r--r-- | iconvdata/SJIS.irreversible | 7 | ||||
-rw-r--r-- | iconvdata/big5.c | 15 | ||||
-rw-r--r-- | iconvdata/cns11643.h | 25 | ||||
-rw-r--r-- | iconvdata/cns11643l1.c | 5 | ||||
-rw-r--r-- | iconvdata/cns11643l1.h | 6 | ||||
-rw-r--r-- | iconvdata/euc-tw.c | 15 | ||||
-rw-r--r-- | iconvdata/iso_6937-2.c | 2 | ||||
-rw-r--r-- | iconvdata/jis0208.c | 2 | ||||
-rw-r--r-- | iconvdata/jis0212.c | 2 | ||||
-rw-r--r-- | iconvdata/sjis.c | 2 | ||||
-rwxr-xr-x | iconvdata/tst-table-charmap.sh | 35 | ||||
-rw-r--r-- | iconvdata/tst-table-from.c | 225 | ||||
-rw-r--r-- | iconvdata/tst-table-to.c | 107 | ||||
-rwxr-xr-x | iconvdata/tst-table.sh | 75 | ||||
-rwxr-xr-x | iconvdata/tst-tables.sh | 213 | ||||
-rw-r--r-- | linuxthreads/attr.c | 2 | ||||
-rw-r--r-- | localedata/ChangeLog | 17 | ||||
-rw-r--r-- | sysdeps/ia64/Makefile | 4 |
28 files changed, 994 insertions, 146 deletions
diff --git a/elf/Makefile b/elf/Makefile index 9f9fef99bc..ab71adf3c2 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -29,12 +29,15 @@ routines = $(dl-routines) dl-open dl-close dl-symbol dl-support \ dl-routines = $(addprefix dl-,load cache lookup object reloc deps \ runtime error init fini debug misc \ version profile) +all-dl-routines = $(dl-routines) $(sysdep-dl-routines) # But they are absent from the shared libc, because that code is in ld.so. -elide-routines.os = $(dl-routines) dl-support enbl-secure +elide-routines.os = $(all-dl-routines) dl-support enbl-secure # ld.so uses those routines, plus some special stuff for being the program # interpreter and operating independent of libc. rtld-routines := rtld $(dl-routines) dl-sysdep dl-environ dl-minimal +all-rtld-routines = $(rtld-routines) $(sysdep-dl-routines) + distribute := $(rtld-routines:=.c) dynamic-link.h do-rel.h dl-machine.h \ dl-cache.h dl-hash.h soinit.c sofini.c ldd.bash.in \ genrtldtbl.awk atomicity.h dl-procinfo.h ldsodefs.h \ @@ -60,7 +63,7 @@ ld-map = $(common-objpfx)ld.map endif ifeq (yes,$(build-shared)) -extra-objs = $(rtld-routines:=.os) soinit.os sofini.os interp.os +extra-objs = $(all-rtld-routines:%=%.os) soinit.os sofini.os interp.os generated += librtld.os dl-allobjs.os ld.so ldd install-others = $(inst_slibdir)/$(rtld-installed-name) install-bin = ldd @@ -118,7 +121,7 @@ endif # Command to link into a larger single relocatable object. reloc-link = $(LINK.o) -nostdlib -nostartfiles -r -o $@ -$(objpfx)dl-allobjs.os: $(rtld-routines:%=$(objpfx)%.os) +$(objpfx)dl-allobjs.os: $(all-rtld-routines:%=$(objpfx)%.os) $(reloc-link) $^ # Link together the dynamic linker into a single relocatable object. diff --git a/iconv/gconv_builtin.c b/iconv/gconv_builtin.c index 7a2072d904..a530c82d46 100644 --- a/iconv/gconv_builtin.c +++ b/iconv/gconv_builtin.c @@ -75,7 +75,6 @@ __gconv_get_builtin_trans (const char *name, struct __gconv_step *step) step->__fct = map[cnt].fct; step->__init_fct = map[cnt].init; step->__end_fct = map[cnt].end; - step->__counter = INT_MAX; step->__shlib_handle = NULL; step->__modname = NULL; diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index ed2698a628..dd51670af1 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -163,7 +163,8 @@ free_derivation (void *p) size_t cnt; for (cnt = 0; cnt < deriv->nsteps; ++cnt) - if (deriv->steps[cnt].__end_fct) + if (deriv->steps[cnt].__counter > 0 + && deriv->steps[cnt].__end_fct != NULL) DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt])); /* Free the name strings. */ @@ -175,6 +176,28 @@ free_derivation (void *p) } +/* Decrement the reference count for a single step in a steps array. */ +static inline void +release_step (struct __gconv_step *step) +{ + if (--step->__counter == 0) + { + /* Call the destructor. */ + if (step->__end_fct != NULL) + DL_CALL_FCT (step->__end_fct, (step)); + +#ifndef STATIC_GCONV + /* Skip builtin modules; they are not reference counted. */ + if (step->__shlib_handle != NULL) + { + /* Release the loaded module. */ + __gconv_release_shlib (step->__shlib_handle); + step->__shlib_handle = NULL; + } +#endif + } +} + static int internal_function gen_steps (struct derivation_step *best, const char *toset, @@ -222,7 +245,6 @@ gen_steps (struct derivation_step *best, const char *toset, result[step_cnt].__shlib_handle = shlib_handle; result[step_cnt].__modname = shlib_handle->name; - result[step_cnt].__counter = 1; result[step_cnt].__fct = shlib_handle->fct; result[step_cnt].__init_fct = shlib_handle->init_fct; result[step_cnt].__end_fct = shlib_handle->end_fct; @@ -233,6 +255,8 @@ gen_steps (struct derivation_step *best, const char *toset, __gconv_get_builtin_trans (current->code->module_name, &result[step_cnt]); + result[step_cnt].__counter = 1; + /* Call the init function. */ result[step_cnt].__data = NULL; if (result[step_cnt].__init_fct != NULL) @@ -245,6 +269,7 @@ gen_steps (struct derivation_step *best, const char *toset, failed = 1; /* Make sure we unload this modules. */ --step_cnt; + result[step_cnt].__end_fct = NULL; break; } } @@ -256,13 +281,7 @@ gen_steps (struct derivation_step *best, const char *toset, { /* Something went wrong while initializing the modules. */ while (++step_cnt < *nsteps) - { - if (result[step_cnt].__end_fct != NULL) - DL_CALL_FCT (result[step_cnt].__end_fct, (&result[step_cnt])); -#ifndef STATIC_GCONV - __gconv_release_shlib (result[step_cnt].__shlib_handle); -#endif - } + release_step (&result[step_cnt]); free (result); *nsteps = 0; *handle = NULL; @@ -292,29 +311,38 @@ increment_counter (struct __gconv_step *steps, size_t nsteps) int result = __GCONV_OK; while (cnt-- > 0) - if (steps[cnt].__counter++ == 0) - { - steps[cnt].__shlib_handle = - __gconv_find_shlib (steps[cnt].__modname); - if (steps[cnt].__shlib_handle == NULL) - { - /* Oops, this is the second time we use this module (after - unloading) and this time loading failed!? */ - while (++cnt < nsteps) - __gconv_release_shlib (steps[cnt].__shlib_handle); - result = __GCONV_NOCONV; - break; - } - - steps[cnt].__init_fct = steps[cnt].__shlib_handle->init_fct; - steps[cnt].__fct = steps[cnt].__shlib_handle->fct; - steps[cnt].__end_fct = steps[cnt].__shlib_handle->end_fct; - - if (steps[cnt].__end_fct != NULL) - DL_CALL_FCT (steps[cnt].__end_fct, (&steps[cnt])); - if (steps[cnt].__init_fct != NULL) - DL_CALL_FCT (steps[cnt].__init_fct, (&steps[cnt])); - } + { + struct __gconv_step *step = &steps[cnt]; + + if (step->__counter++ == 0) + { + /* Skip builtin modules. */ + if (step->__modname != NULL) + { + /* Reopen a previously used module. */ + step->__shlib_handle = __gconv_find_shlib (step->__modname); + if (step->__shlib_handle == NULL) + { + /* Oops, this is the second time we use this module + (after unloading) and this time loading failed!? */ + --step->__counter; + while (++cnt < nsteps) + release_step (&steps[cnt]); + result = __GCONV_NOCONV; + break; + } + + /* The function addresses defined by the module may + have changed. */ + step->__fct = step->__shlib_handle->fct; + step->__init_fct = step->__shlib_handle->init_fct; + step->__end_fct = step->__shlib_handle->end_fct; + } + + if (step->__init_fct != NULL) + DL_CALL_FCT (step->__init_fct, (step)); + } + } return result; } #endif @@ -333,9 +361,8 @@ find_derivation (const char *toset, const char *toset_expand, int best_cost_lo = INT_MAX; int result; - /* There is a small chance that this derivation is meanwhile found. This - can happen if in `find_derivation' we look for this derivation, didn't - find it but at the same time another thread looked for this derivation. */ + /* Look whether an earlier call to `find_derivation' has already + computed a possible derivation. If so, return it immediately. */ result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, handle, nsteps); if (result == __GCONV_OK) @@ -346,9 +373,32 @@ find_derivation (const char *toset, const char *toset_expand, return result; } - /* For now we use a simple algorithm with quadratic runtime behaviour. - The task is to match the `toset' with any of the available rules, - starting from FROMSET. */ + /* The task is to find a sequence of transformations, backed by the + existing modules - whether builtin or dynamically loadable -, + starting at `fromset' (or `fromset_expand') and ending at `toset' + (or `toset_expand'), and with minimal cost. + + For computer scientists, this is a shortest path search in the + graph where the nodes are all possible charsets and the edges are + the transformations listed in __gconv_modules_db. + + For now we use a simple algorithm with quadratic runtime behaviour. + A breadth-first search, starting at `fromset' and `fromset_expand'. + The list starting at `first' contains all nodes that have been + visited up to now, in the order in which they have been visited -- + excluding the goal nodes `toset' and `toset_expand' which get + managed in the list starting at `solution'. + `current' walks through the list starting at `first' and looks + which nodes are reachable from the current node, adding them to + the end of the list [`first' or `solution' respectively] (if + they are visited the first time) or updating them in place (if + they have have already been visited). + In each node of either list, cost_lo and cost_hi contain the + minimum cost over any paths found up to now, starting at `fromset' + or `fromset_expand', ending at that node. best_cost_lo and + best_cost_hi represent the minimum over the elements of the + `solution' list. */ + if (fromset_expand != NULL) { first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); @@ -373,16 +423,17 @@ find_derivation (const char *toset, const char *toset_expand, searching for prefixes. So we search for the first entry with a matching prefix and any other matching entry can be found from this place. */ - struct gconv_module *node = __gconv_modules_db; + struct gconv_module *node; /* Maybe it is not necessary anymore to look for a solution for - this entry since the cost is already as high (or heigher) as + this entry since the cost is already as high (or higher) as the cost for the best solution so far. */ if (current->cost_hi > best_cost_hi || (current->cost_hi == best_cost_hi && current->cost_lo >= best_cost_lo)) continue; + node = __gconv_modules_db; while (node != NULL) { int cmpres = strcmp (current->result_set, node->from_string); @@ -404,37 +455,52 @@ find_derivation (const char *toset, const char *toset_expand, struct derivation_step *step; /* We managed to find a derivation. First see whether - this is what we are looking for. */ + we have reached one of the goal nodes. */ if (strcmp (result_set, toset) == 0 || (toset_expand != NULL && strcmp (result_set, toset_expand) == 0)) { - if (solution == NULL || cost_hi < best_cost_hi + /* Append to the `solution' list if there + is no entry with this name. */ + for (step = solution; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + step = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + step->next = solution; + solution = step; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `solution' list. */ + step->code = runp; + step->last = current; + step->cost_hi = cost_hi; + step->cost_lo = cost_lo; + } + + /* Update best_cost accordingly. */ + if (cost_hi < best_cost_hi || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) { best_cost_hi = cost_hi; best_cost_lo = cost_lo; } - - /* Append this solution to list. */ - if (solution == NULL) - solution = NEW_STEP (result_set, 0, 0, runp, current); - else - { - while (solution->next != NULL) - solution = solution->next; - - solution->next = NEW_STEP (result_set, 0, 0, - runp, current); - } } else if (cost_hi < best_cost_hi || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) { - /* Append at the end if there is no entry with - this name. */ + /* Append at the end of the `first' list if there + is no entry with this name. */ for (step = first; step != NULL; step = step->next) if (strcmp (result_set, step->result_set) == 0) break; @@ -450,31 +516,36 @@ find_derivation (const char *toset, const char *toset_expand, || (step->cost_hi == cost_hi && step->cost_lo > cost_lo)) { + /* A better path was found for the node, + on the `first' list. */ step->code = runp; step->last = current; /* Update the cost for all steps. */ for (step = first; step != NULL; step = step->next) - { - struct derivation_step *back; - - if (step->code == NULL) - /* This is one of the entries we started - from. */ - continue; - - step->cost_hi = step->code->cost_hi; - step->cost_lo = step->code->cost_lo; - - for (back = step->last; back->code != NULL; - back = back->last) - { - step->cost_hi += back->code->cost_hi; - step->cost_lo += back->code->cost_lo; - } - } - + /* But don't update the start nodes. */ + if (step->code != NULL) + { + struct derivation_step *back; + int hi, lo; + + hi = step->code->cost_hi; + lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + hi += back->code->cost_hi; + lo += back->code->cost_lo; + } + + step->cost_hi = hi; + step->cost_lo = lo; + } + + /* Likewise for the nodes on the solution list. + Also update best_cost accordingly. */ for (step = solution; step != NULL; step = step->next) { @@ -487,7 +558,6 @@ find_derivation (const char *toset, const char *toset_expand, || (step->cost_hi == best_cost_hi && step->cost_lo < best_cost_lo)) { - solution = step; best_cost_hi = step->cost_hi; best_cost_lo = step->cost_lo; } @@ -509,10 +579,26 @@ find_derivation (const char *toset, const char *toset_expand, } if (solution != NULL) - /* We really found a way to do the transformation. Now build a data - structure describing the transformation steps.*/ - result = gen_steps (solution, toset_expand ?: toset, - fromset_expand ?: fromset, handle, nsteps); + { + /* We really found a way to do the transformation. */ + + /* Choose the best solution. This is easy because we know that + the solution list has at most length 2 (one for every possible + goal node). */ + if (solution->next != NULL) + { + struct derivation_step *solution2 = solution->next; + + if (solution2->cost_hi < solution->cost_hi + || (solution2->cost_hi == solution->cost_hi + && solution2->cost_lo < solution->cost_lo)) + solution = solution2; + } + + /* Now build a data structure describing the transformation steps. */ + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); + } else { /* We haven't found a transformation. Clear the result values. */ @@ -609,14 +695,7 @@ __gconv_close_transform (struct __gconv_step *steps, size_t nsteps) __libc_lock_lock (lock); while (nsteps-- > 0) - if (steps[nsteps].__shlib_handle != NULL - && --steps[nsteps].__counter == 0) - { - result = __gconv_release_shlib (steps[nsteps].__shlib_handle); - if (result != __GCONV_OK) - break; - steps[nsteps].__shlib_handle = NULL; - } + release_step (&steps[nsteps]); /* Release the lock. */ __libc_lock_unlock (lock); diff --git a/iconv/gconv_dl.c b/iconv/gconv_dl.c index d07f84e31b..308db52c83 100644 --- a/iconv/gconv_dl.c +++ b/iconv/gconv_dl.c @@ -164,7 +164,9 @@ do_release_shlib (const void *nodep, VISIT value, int level) } else if (obj->counter <= 0) { - if (--obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) + if (obj->counter >= -TRIES_BEFORE_UNLOAD) + --obj->counter; + if (obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) { /* Unload the shared object. */ __libc_dlclose (obj->handle); @@ -175,7 +177,7 @@ do_release_shlib (const void *nodep, VISIT value, int level) /* Notify system that a shared object is not longer needed. */ -int +void internal_function __gconv_release_shlib (struct __gconv_loaded_object *handle) { @@ -186,8 +188,6 @@ __gconv_release_shlib (struct __gconv_loaded_object *handle) with release counts <= 0. This way we can finally unload them if necessary. */ __twalk (loaded, do_release_shlib); - - return __GCONV_OK; } diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 8333a215c7..ad2d6e7d4a 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -55,7 +55,7 @@ struct gconv_alias object is also handled. */ struct __gconv_loaded_object { - /* Name of the object. */ + /* Name of the object. It must be the first structure element. */ const char *name; /* Reference counter for the db functionality. If no conversion is @@ -201,7 +201,7 @@ extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) /* Release shared object. If no further reference is available unload the object. */ -extern int __gconv_release_shlib (struct __gconv_loaded_object *handle) +extern void __gconv_release_shlib (struct __gconv_loaded_object *handle) internal_function; /* Fill STEP with information about builtin module with NAME. */ diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 9376b151ed..a41e1b50b2 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -34,19 +34,6 @@ #endif -/* These are definitions used by some of the functions for handling - UTF-8 encoding below. */ -static const uint32_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - - /* Transform from the internal, UCS4-like format, to UCS4. The difference between the internal ucs4 format and the real UCS4 format is, if any, the endianess. The Unicode/ISO 10646 says that @@ -856,7 +843,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, char *start; \ \ for (step = 2; step < 6; ++step) \ - if ((wc & encoding_mask[step - 2]) == 0) \ + if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ break; \ \ if (__builtin_expect (outptr + step > outend, 0)) \ @@ -867,7 +854,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, } \ \ start = outptr; \ - *outptr = encoding_byte[step - 2]; \ + *outptr = (unsigned char) (~0xff >> step); \ outptr += step; \ --step; \ do \ diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c index 8c658b126f..4a42a35afd 100644 --- a/iconv/gconv_trans.c +++ b/iconv/gconv_trans.c @@ -330,6 +330,7 @@ __gconv_translit_find (struct trans_struct *trans) { /* Copy the data. */ *trans = (*found)->info; + (*found)->open_count++; res = 0; } } @@ -345,7 +346,7 @@ __gconv_translit_find (struct trans_struct *trans) __gconv_get_path (); /* See whether we have to append .so. */ - if (name_len <= 3 || memcmp (&trans->name[name_len - 3], ".so", 3) != 0) + if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0) need_so = 1; /* Create a new entry. */ @@ -366,7 +367,7 @@ __gconv_translit_find (struct trans_struct *trans) newp->fname = cp; - /* Seach in all the directories. */ + /* Search in all the directories. */ for (runp = __gconv_path_elem; runp->name != NULL; ++runp) { cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name), @@ -382,6 +383,9 @@ __gconv_translit_find (struct trans_struct *trans) } } + if (res) + newp->fname = NULL; + /* In any case we'll add the entry to our search tree. */ if (__tsearch (newp, &search_tree, trans_compare) == NULL) { diff --git a/iconvdata/EUC-JP.irreversible b/iconvdata/EUC-JP.irreversible new file mode 100644 index 0000000000..e4c0ed5736 --- /dev/null +++ b/iconvdata/EUC-JP.irreversible @@ -0,0 +1,6 @@ +0x5C 0x00A5 +0x7E 0x203E +0x8FA2B7 0x007E +0x8FA2B7 0xFF5E +0xA1C0 0x005C +0xA1C0 0xFF3C diff --git a/iconvdata/ISIRI-3342.irreversible b/iconvdata/ISIRI-3342.irreversible new file mode 100644 index 0000000000..131393baa0 --- /dev/null +++ b/iconvdata/ISIRI-3342.irreversible @@ -0,0 +1,52 @@ +0x80 0x0000 +0x81 0x0001 +0x82 0x0002 +0x83 0x0003 +0x84 0x0004 +0x85 0x0005 +0x86 0x0006 +0x87 0x0007 +0x88 0x0008 +0x89 0x0009 +0x8A 0x000A +0x8B 0x000B +0x8C 0x000C +0x8D 0x000D +0x8E 0x000E +0x8F 0x000F +0x90 0x0010 +0x91 0x0011 +0x92 0x0012 +0x93 0x0013 +0x94 0x0014 +0x95 0x0015 +0x96 0x0016 +0x97 0x0017 +0x98 0x0018 +0x99 0x0019 +0x9A 0x001A +0x9B 0x001B +0x9C 0x001C +0x9D 0x001D +0x9E 0x001E +0x9F 0x001F +0xA0 0x0020 +0xA3 0x0021 +0xA6 0x002E +0xA8 0x0029 +0xA9 0x0028 +0xAB 0x002B +0xAD 0x002D +0xAF 0x002F +0xBA 0x003A +0xBC 0x003C +0xBD 0x003D +0xBE 0x003E +0xE2 0x005D +0xE3 0x005B +0xE4 0x007D +0xE5 0x007B +0xE8 0x002A +0xEA 0x007C +0xEB 0x005C +0xFF 0x007F diff --git a/iconvdata/Makefile b/iconvdata/Makefile index eaeeb411c2..687ca4e156 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -51,6 +51,8 @@ modules.so := $(addsuffix .so, $(modules)) tests = bug-iconv1 bug-iconv2 tst-loading +test-srcs := tst-table-from tst-table-to + include ../Makeconfig libJIS-routines := jis0201 jis0208 jis0212 @@ -89,7 +91,10 @@ LDFLAGS-libISOIR165.so = -Wl,-soname,$(@F) distribute := gconv-modules extra-module.mk gap.awk gaptab.awk \ gen-8bit.sh gen-8bit-gap.sh gen-8bit-gap-1.sh \ TESTS $(filter-out testdata/CVS%, $(wildcard testdata/*)) \ - run-iconv-test.sh 8bit-generic.c 8bit-gap.c \ + run-iconv-test.sh tst-tables.sh tst-table.sh \ + tst-table-charmap.sh tst-table-from.c tst-table-to.c \ + EUC-JP.irreversible ISIRI-3342.irreversible SJIS.irreversible \ + 8bit-generic.c 8bit-gap.c \ ansi_x3.110.c asmo_449.c big5.c cp737.c cp737.h \ cp775.c cp775.h ibm874.c cns11643.c cns11643.h \ cns11643l1.c cns11643l1.h cp1250.c cp1251.c cp1252.c cp1253.c \ @@ -244,7 +249,8 @@ include ../Rules ifeq (no,$(cross-compiling)) ifeq (yes,$(build-shared)) -tests: $(objpfx)iconv-test.out $(objpfx)mtrace-tst-loading +tests: $(objpfx)iconv-test.out $(objpfx)tst-tables.out \ + $(objpfx)mtrace-tst-loading endif endif @@ -259,6 +265,17 @@ $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(common-objdir)/iconv/iconv_prog TESTS $(SHELL) -e $< $(common-objdir) > $@ +$(objpfx)tst-tables.out: tst-tables.sh $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) \ + $(objpfx)tst-table-from $(objpfx)tst-table-to + $(SHELL) $< $(common-objpfx) $(common-objpfx)iconvdata/ > $@ + +do-tests-clean common-mostlyclean: tst-tables-clean + +.PHONY: tst-tables-clean +tst-tables-clean: + -rm -f $(objpfx)tst-*.table $(objpfx)tst-EUC-TW.irreversible + ifdef objpfx $(objpfx)gconv-modules: gconv-modules cp $^ $@ diff --git a/iconvdata/SJIS.irreversible b/iconvdata/SJIS.irreversible new file mode 100644 index 0000000000..dfc678a1c6 --- /dev/null +++ b/iconvdata/SJIS.irreversible @@ -0,0 +1,7 @@ +0x5C 0x005C +0x7E 0x007E +0x815F 0x005C +0x815F 0xFF3C +0x8191 0xFFE0 +0x8192 0xFFE1 +0x81CA 0xFFE2 diff --git a/iconvdata/big5.c b/iconvdata/big5.c index 9cf5a5f6f4..8748036724 100644 --- a/iconvdata/big5.c +++ b/iconvdata/big5.c @@ -8644,8 +8644,21 @@ static const char from_ucs4_tab15[][2] = \ inptr += 2; \ } \ - else \ + else if (ch <= 0x80) \ ++inptr; \ + else \ + { \ + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++inptr; \ + ++*irreversible; \ + continue; \ + } \ \ put32 (outptr, ch); \ outptr += 4; \ diff --git a/iconvdata/cns11643.h b/iconvdata/cns11643.h index 27c484bbd2..b57aa9decb 100644 --- a/iconvdata/cns11643.h +++ b/iconvdata/cns11643.h @@ -1,5 +1,5 @@ /* Access functions for CNS 11643, plane 2 handling. - Copyright (C) 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -50,19 +50,19 @@ cns11643_to_ucs4 (const char **s, size_t avail, unsigned char offset) idx = (ch2 - 0x21 - offset) * 94 + (ch3 - 0x21 - offset); - if ((ch - 0x21 - offset) == 1) + if ((ch - 0x20 - offset) == 1) { - if (idx > 0x2196) + if (idx > 0x21f2) return __UNKNOWN_10646_CHAR; result = __cns11643l1_to_ucs4_tab[idx]; } - else if ((ch - 0x21 - offset) == 2) + else if ((ch - 0x20 - offset) == 2) { if (idx > 0x1de1) return __UNKNOWN_10646_CHAR; result = __cns11643l2_to_ucs4_tab[idx]; } - else if ((ch - 0x21 - offset) == 0xe) + else if ((ch - 0x20 - offset) == 0xe) { if (idx > 0x19bd) return __UNKNOWN_10646_CHAR; @@ -166,7 +166,7 @@ ucs4_to_cns11643 (uint32_t wch, char *s, size_t avail) break; case 0x3105 ... 0x3129: buf[0] = '\x25'; - buf[1] = '\x26' + (ch - 0x3105); + buf[1] = '\x47' + (ch - 0x3105); break; case 0x32a3: cp = "\x22\x21"; @@ -177,12 +177,13 @@ ucs4_to_cns11643 (uint32_t wch, char *s, size_t avail) case 0x4e00 ... 0x9f9c: cp = __cns11643l1_from_ucs4_tab12[ch - 0x4e00]; - if (cp[0] == '\0') - { - /* Let's try the other planes. */ - needed = 3; - cp = __cns11643_from_ucs4_tab[ch - 0x4e00]; - } + if (cp[0] != '\0') + break; + /* FALLTHROUGH. Let's try the other planes. */ + case 0x9f9d ... 0x9fa5: + /* Let's try the other planes. */ + needed = 3; + cp = __cns11643_from_ucs4_tab[ch - 0x4e00]; break; case 0xfe30 ... 0xfe6b: cp = __cns11643l1_from_ucs4_tab13[ch - 0xfe30]; diff --git a/iconvdata/cns11643l1.c b/iconvdata/cns11643l1.c index d106b3d34c..2372d64a4f 100644 --- a/iconvdata/cns11643l1.c +++ b/iconvdata/cns11643l1.c @@ -1,5 +1,5 @@ /* Mapping tables for CNS 11643, plane 1 handling. - Copyright (C) 1998 Free Software Foundation, Inc. + Copyright (C) 1998, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -1637,7 +1637,8 @@ const char __cns11643l1_from_ucs4_tab6[][2] = [0x000a] = "\x22\x49", [0x000b] = "\x22\x48", [0x0014] = "\x22\x45", [0x0015] = "\x22\x46", [0x0016] = "\x22\x4d", [0x0019] = "\x22\x4e", [0x001f] = "\x22\x50", [0x0020] = "\x22\x4f", [0x0027] = "\x22\x44", - [0x003d] = "\x22\x3d", + [0x003d] = "\x22\x3d", [0x004b] = "\x22\x3b", [0x004c] = "\x22\x3e", + [0x0051] = "\x22\x39", [0x0052] = "\x22\x3a", }; /* Graphic pictures for control codes. The table can be created using diff --git a/iconvdata/cns11643l1.h b/iconvdata/cns11643l1.h index 1aa5918f91..6072c8340b 100644 --- a/iconvdata/cns11643l1.h +++ b/iconvdata/cns11643l1.h @@ -1,5 +1,5 @@ /* Access functions for CNS 11643, plane 1 handling. - Copyright (C) 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -44,7 +44,7 @@ cns11643l1_to_ucs4 (const unsigned char **s, size_t avail, return __UNKNOWN_10646_CHAR; idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); - if (idx > 0x2196) + if (idx > 0x21f2) return __UNKNOWN_10646_CHAR; (*s) += 2; @@ -137,7 +137,7 @@ ucs4_to_cns11643l1 (uint32_t wch, unsigned char *s, size_t avail) break; case 0x3105 ... 0x3129: buf[0] = '\x25'; - buf[1] = '\x26' + (ch - 0x3105); + buf[1] = '\x47' + (ch - 0x3105); break; case 0x32a3: cp = "\x22\x21"; diff --git a/iconvdata/euc-tw.c b/iconvdata/euc-tw.c index ed7a197d7c..39c2f1f7da 100644 --- a/iconvdata/euc-tw.c +++ b/iconvdata/euc-tw.c @@ -65,7 +65,7 @@ character is also available. */ \ uint32_t ch2; \ \ - if (inptr + (ch == 0x8e ? 3 : 1) >= inend) \ + if (inptr + 1 >= inend) \ { \ /* The second character is not available. Store the \ intermediate result. */ \ @@ -73,7 +73,7 @@ break; \ } \ \ - ch2 = *inptr; \ + ch2 = *(inptr + 1); \ \ /* All second bytes of a multibyte character must be >= 0xa1. */ \ if (ch2 < 0xa1 || ch2 == 0xff) \ @@ -96,8 +96,15 @@ const char *endp = inptr + 1; \ \ ch = cns11643_to_ucs4 (&endp, inend - inptr - 1, 0x80); \ - /* Please note that we need not test for the missing input \ - characters here anymore. */ \ + \ + if (ch == 0) \ + { \ + /* The third or fourth character is not available. Store \ + the intermediate result. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ if (ch == __UNKNOWN_10646_CHAR) \ { \ /* Illegal input. */ \ diff --git a/iconvdata/iso_6937-2.c b/iconvdata/iso_6937-2.c index dda8acd53f..01c0c5857b 100644 --- a/iconvdata/iso_6937-2.c +++ b/iconvdata/iso_6937-2.c @@ -46,7 +46,7 @@ static const uint32_t to_ucs4[256] = /* 0x98 */ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, /* 0xa0 */ 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x0024, 0x00a5, 0x0000, 0x00a7, /* 0xa8 */ 0x0000, 0x2018, 0x201c, 0x00ab, 0x2190, 0x2191, 0x2192, 0x2193, - /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00d7, 0x00b5, 0x00b6, 0x00b7, /* 0xb8 */ 0x00f7, 0x2019, 0x201d, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, diff --git a/iconvdata/jis0208.c b/iconvdata/jis0208.c index 06558f6983..bd92e1229b 100644 --- a/iconvdata/jis0208.c +++ b/iconvdata/jis0208.c @@ -67,7 +67,7 @@ const uint16_t __jis0208_to_ucs[0x1e80] = [0x0010] = 0xffe3, [0x0011] = 0xff3f, [0x0012] = 0x30fd, [0x0013] = 0x30fe, [0x0014] = 0x309d, [0x0015] = 0x309e, [0x0016] = 0x3003, [0x0017] = 0x4edd, [0x0018] = 0x3005, [0x0019] = 0x3006, [0x001a] = 0x3007, [0x001b] = 0x30fc, - [0x001c] = 0x2015, [0x001d] = 0x2010, [0x001e] = 0xff0f, [0x001f] = 0xff3c, + [0x001c] = 0x2015, [0x001d] = 0x2010, [0x001e] = 0xff0f, [0x001f] = 0x005c, [0x0020] = 0x301c, [0x0021] = 0x2016, [0x0022] = 0xff5c, [0x0023] = 0x2026, [0x0024] = 0x2025, [0x0025] = 0x2018, [0x0026] = 0x2019, [0x0027] = 0x201c, [0x0028] = 0x201d, [0x0029] = 0xff08, [0x002a] = 0xff09, [0x002b] = 0x3014, diff --git a/iconvdata/jis0212.c b/iconvdata/jis0212.c index 36857b653e..b8baaa3652 100644 --- a/iconvdata/jis0212.c +++ b/iconvdata/jis0212.c @@ -111,7 +111,7 @@ const struct jisx0212_idx __jisx0212_to_ucs_idx[] = const uint16_t __jisx0212_to_ucs[] = { 0x02d8, 0x02c7, 0x00b8, 0x02d9, 0x02dd, 0x00af, 0x02db, 0x02da, - 0xff5e, 0x0384, 0x0385, 0x00a1, 0x00a6, 0x00bf, 0x00ba, 0x00aa, + 0x007e, 0x0384, 0x0385, 0x00a1, 0x00a6, 0x00bf, 0x00ba, 0x00aa, 0x00a9, 0x00ae, 0x2122, 0x00a4, 0x2116, 0x0386, 0x0388, 0x0389, 0x038a, 0x03aa, 000000, 0x038c, 000000, 0x038e, 0x03ab, 000000, 0x038f, 000000, 000000, 000000, 000000, 0x03ac, 0x03ad, 0x03ae, diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c index 7bbfef3e5d..33a20561ed 100644 --- a/iconvdata/sjis.c +++ b/iconvdata/sjis.c @@ -67,7 +67,7 @@ static const uint16_t cjk_block1[703] = [ 16] = 0xFFE3, [ 17] = 0xFF3F, [ 18] = 0x30FD, [ 19] = 0x30FE, [ 20] = 0x309D, [ 21] = 0x309E, [ 22] = 0x3003, [ 23] = 0x4EDD, [ 24] = 0x3005, [ 25] = 0x3006, [ 26] = 0x3007, [ 27] = 0x30FC, - [ 28] = 0x2015, [ 29] = 0x2010, [ 30] = 0xFF0F, [ 31] = 0xFF3C, + [ 28] = 0x2015, [ 29] = 0x2010, [ 30] = 0xFF0F, [ 31] = 0x005C, [ 32] = 0x301C, [ 33] = 0x2016, [ 34] = 0xFF5C, [ 35] = 0x2026, [ 36] = 0x2025, [ 37] = 0x2018, [ 38] = 0x2019, [ 39] = 0x201C, [ 40] = 0x201D, [ 41] = 0xFF08, [ 42] = 0xFF09, [ 43] = 0x3014, diff --git a/iconvdata/tst-table-charmap.sh b/iconvdata/tst-table-charmap.sh new file mode 100755 index 0000000000..99c713c363 --- /dev/null +++ b/iconvdata/tst-table-charmap.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# Copyright (C) 2000 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with the GNU C Library; see the file COPYING.LIB. If not, +# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# Converts a glibc format charmap to a simple format .table file. + +LC_ALL=C +export LC_ALL + +case "$1" in + POSIX ) + # Old POSIX/DKUUG borrowed format + grep '^<.*>.*/x[0-9A-Fa-f]*[ ]*<U....>.*$' | grep -v 'not a real character' | sed -e 's,^<.*>[ ]*\([/x0-9A-Fa-f]*\)[ ]*<U\(....\)>.*$,\1 0x\2,' | tr abcdef ABCDEF | sed -e 's,/x\([0-9A-F][0-9A-F]\),\1,g' | sed -e 's,^,0x,' | sort | uniq | grep -v '^0x00 0x\([1-9A-F]...\|.[1-9A-F]..\|..[1-9A-F].\|...[1-9A-F]\)' + ;; + *) + # New Unicode based format + sed -e 's,^%IRREVERSIBLE%,,' | grep '^<U....>[ ]*/x' | grep -v 'not a real character' | sed -e 's,<U\(....\)>[ ]*\([/x0-9A-Fa-f]*\).*$,\2 0x\1,' | tr abcdef ABCDEF | sed -e 's,/x\([0-9A-F][0-9A-F]\),\1,g' | sed -e 's,^,0x,' | sort | uniq | grep -v '^0x00 0x\([1-9A-F]...\|.[1-9A-F]..\|..[1-9A-F].\|...[1-9A-F]\)' + ;; +esac diff --git a/iconvdata/tst-table-from.c b/iconvdata/tst-table-from.c new file mode 100644 index 0000000000..bd2647eae0 --- /dev/null +++ b/iconvdata/tst-table-from.c @@ -0,0 +1,225 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Create a table from CHARSET to Unicode. + This is a good test for CHARSET's iconv() module, in particular the + FROM_LOOP BODY macro. */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <iconv.h> +#include <errno.h> + +/* Converts a byte buffer to a hexadecimal string. */ +static const char* +hexbuf (unsigned char buf[], unsigned int buflen) +{ + static char msg[50]; + + switch (buflen) + { + case 1: + sprintf (msg, "0x%02X", buf[0]); + break; + case 2: + sprintf (msg, "0x%02X%02X", buf[0], buf[1]); + break; + case 3: + sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]); + break; + case 4: + sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]); + break; + default: + abort (); + } + return msg; +} + +/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (6 bytes) + using the conversion descriptor CD. Returns the number of written bytes, + or 0 if ambiguous, or -1 if invalid. */ +static int +try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out) +{ + const char *inbuf = (const char *) buf; + size_t inbytesleft = buflen; + char *outbuf = (char *) out; + size_t outbytesleft = 6; + size_t result = iconv (cd, + (char *) &inbuf, &inbytesleft, + &outbuf, &outbytesleft); + if (result == (size_t)(-1)) + { + if (errno == EILSEQ) + { + return -1; + } + else if (errno == EINVAL) + { + return 0; + } + else + { + int saved_errno = errno; + fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen)); + errno = saved_errno; + perror (""); + exit (1); + } + } + else + { + if (inbytesleft != 0) + { + fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n", + hexbuf (buf, buflen), + (long) (buflen - inbytesleft), + (long) (6 - outbytesleft)); + exit (1); + } + return 6 - outbytesleft; + } +} + +/* Returns the out[] buffer as a Unicode value. */ +static unsigned int +utf8_decode (const unsigned char *out, unsigned int outlen) +{ + return (outlen==1 ? out[0] : + outlen==2 ? ((out[0] & 0x1f) << 6) + (out[1] & 0x3f) : + outlen==3 ? ((out[0] & 0x0f) << 12) + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f) : + outlen==4 ? ((out[0] & 0x07) << 18) + ((out[1] & 0x3f) << 12) + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f) : + outlen==5 ? ((out[0] & 0x03) << 24) + ((out[1] & 0x3f) << 18) + ((out[2] & 0x3f) << 12) + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f) : + outlen==6 ? ((out[0] & 0x01) << 30) + ((out[1] & 0x3f) << 24) + ((out[2] & 0x3f) << 18) + ((out[3] & 0x3f) << 12) + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f) : + 0xfffd); +} + +int +main (int argc, char *argv[]) +{ + const char *charset; + iconv_t cd; + + if (argc != 2) + { + fprintf (stderr, "Usage: tst-table-to charset\n"); + exit (1); + } + charset = argv[1]; + + cd = iconv_open ("UTF-8", charset); + if (cd == (iconv_t)(-1)) + { + perror ("iconv_open"); + exit (1); + } + + { + unsigned char out[6]; + unsigned char buf[4]; + unsigned int i0, i1, i2, i3; + int result; + + for (i0 = 0; i0 < 0x100; i0++) + { + buf[0] = i0; + result = try (cd, buf, 1, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X\t0x%04X\n", + i0, utf8_decode (out, result)); + } + else + { + for (i1 = 0; i1 < 0x100; i1++) + { + buf[1] = i1; + result = try (cd, buf, 2, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X\t0x%04X\n", + i0, i1, utf8_decode (out, result)); + } + else + { + for (i2 = 0; i2 < 0x100; i2++) + { + buf[2] = i2; + result = try (cd, buf, 3, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X%02X\t0x%04X\n", + i0, i1, i2, utf8_decode (out, result)); + } + else if (strcmp (charset, "UTF-8")) + { + for (i3 = 0; i3 < 0x100; i3++) + { + buf[3] = i3; + result = try (cd, buf, 4, out); + if (result < 0) + { + } + else if (result > 0) + { + printf ("0x%02X%02X%02X%02X\t0x%04X\n", + i0, i1, i2, i3, + utf8_decode (out, result)); + } + else + { + fprintf (stderr, + "%s: incomplete byte sequence\n", + hexbuf (buf, 4)); + exit (1); + } + } + } + } + } + } + } + } + } + + if (iconv_close (cd) < 0) + { + perror ("iconv_close"); + exit (1); + } + + if (ferror (stdin) || ferror (stdout)) + { + fprintf (stderr, "I/O error\n"); + exit (1); + } + + exit (0); +} diff --git a/iconvdata/tst-table-to.c b/iconvdata/tst-table-to.c new file mode 100644 index 0000000000..b725f1f52e --- /dev/null +++ b/iconvdata/tst-table-to.c @@ -0,0 +1,107 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Create a table from Unicode to CHARSET. + This is a good test for CHARSET's iconv() module, in particular the + TO_LOOP BODY macro. */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <iconv.h> +#include <errno.h> + +int +main (int argc, char *argv[]) +{ + const char *charset; + iconv_t cd; + + if (argc != 2) + { + fprintf (stderr, "Usage: tst-table-to charset\n"); + exit (1); + } + charset = argv[1]; + + cd = iconv_open (charset, "UCS-2"); + if (cd == (iconv_t)(-1)) + { + perror ("iconv_open"); + exit (1); + } + + { + unsigned int i; + unsigned char buf[10]; + + for (i = 0; i < 0x10000; i++) + { + unsigned short in = i; + const char *inbuf = (const char *) ∈ + size_t inbytesleft = sizeof (unsigned short); + char *outbuf = (char *) buf; + size_t outbytesleft = sizeof (buf); + size_t result = iconv (cd, + (char *) &inbuf, &inbytesleft, + &outbuf, &outbytesleft); + if (result == (size_t)(-1)) + { + if (errno != EILSEQ) + { + int saved_errno = errno; + fprintf (stderr, "0x%02X: iconv error: ", i); + errno = saved_errno; + perror (""); + exit (1); + } + } + else if (result == 0) /* ignore conversions with transliteration */ + { + unsigned int j, jmax; + if (inbytesleft != 0 || outbytesleft == sizeof (buf)) + { + fprintf (stderr, "0x%02X: inbytes = %ld, outbytes = %ld\n", i, + (long) (sizeof (unsigned short) - inbytesleft), + (long) (sizeof (buf) - outbytesleft)); + exit (1); + } + jmax = sizeof (buf) - outbytesleft; + printf ("0x"); + for (j = 0; j < jmax; j++) + printf ("%02X", buf[j]); + printf ("\t0x%04X\n", i); + } + } + } + + if (iconv_close (cd) < 0) + { + perror ("iconv_close"); + exit (1); + } + + if (ferror (stdin) || ferror (stdout)) + { + fprintf (stderr, "I/O error\n"); + exit (1); + } + + exit (0); +} diff --git a/iconvdata/tst-table.sh b/iconvdata/tst-table.sh new file mode 100755 index 0000000000..4cd2f6e29b --- /dev/null +++ b/iconvdata/tst-table.sh @@ -0,0 +1,75 @@ +#!/bin/sh +# Copyright (C) 2000 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with the GNU C Library; see the file COPYING.LIB. If not, +# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# Checks that the iconv() implementation (in both directions) for a +# stateless encoding agrees with the charmap table. + +common_objpfx=$1 +objpfx=$2 +charset=$3 +charmap=$4 + +GCONV_PATH=${common_objpfx}iconvdata +export GCONV_PATH +LC_ALL=C +export LC_ALL + +set -e + +# Get the charmap. +./tst-table-charmap.sh ${charmap:-$charset} \ + < ../localedata/charmaps/${charmap:-$charset} \ + > ${objpfx}tst-${charset}.charmap.table + +# Precompute expected differences between the two iconv directions. +if test ${charset} = EUC-TW; then + irreversible=${objpfx}tst-${charset}.irreversible + grep '^0x8EA1' ${objpfx}tst-${charset}.charmap.table > ${irreversible} +else + irreversible=${charset}.irreversible +fi + +# iconv in one direction. +${common_objpfx}elf/ld.so --library-path $common_objpfx \ +${objpfx}tst-table-from ${charset} \ + > ${objpfx}tst-${charset}.table + +# iconv in the other direction. +${common_objpfx}elf/ld.so --library-path $common_objpfx \ +${objpfx}tst-table-to ${charset} | sort \ + > ${objpfx}tst-${charset}.inverse.table + +# Difference between the two iconv directions. +diff ${objpfx}tst-${charset}.table ${objpfx}tst-${charset}.inverse.table | \ + grep '^[<>]' | sed -e 's,^. ,,' > ${objpfx}tst-${charset}.irreversible.table + +# Check 1: charmap and iconv forward should be identical. +cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.table + +# Check 2: the difference between the two iconv directions. +if test -f ${irreversible}; then + cat ${objpfx}tst-${charset}.charmap.table ${irreversible} | sort | uniq -u \ + > ${objpfx}tst-${charset}.tmp.table + cmp -s ${objpfx}tst-${charset}.tmp.table ${objpfx}tst-${charset}.inverse.table +else + cmp -s ${objpfx}tst-${charset}.table ${objpfx}tst-${charset}.inverse.table +fi + +exit 0 diff --git a/iconvdata/tst-tables.sh b/iconvdata/tst-tables.sh new file mode 100755 index 0000000000..8d2735a12e --- /dev/null +++ b/iconvdata/tst-tables.sh @@ -0,0 +1,213 @@ +#!/bin/sh +# Copyright (C) 2000 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with the GNU C Library; see the file COPYING.LIB. If not, +# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# Checks that the iconv() implementation (in both directions) for the +# stateless encodings agrees with the corresponding charmap table. + +common_objpfx=$1 +objpfx=$2 + +status=0 + +cat <<EOF | + # Single-byte and other "small" encodings come here. + # Keep this list in the same order as gconv-modules. + # + # charset name table name comment + ASCII ANSI_X3.4-1968 + ISO646-GB BS_4730 + ISO646-CA CSA_Z243.4-1985-1 + ISO646-CA2 CSA_Z243.4-1985-2 + ISO646-DE DIN_66003 + ISO646-DK DS_2089 + ISO646-ES ES + ISO646-ES2 ES2 + ISO646-CN GB_1988-80 + ISO646-IT IT + ISO646-JP JIS_C6220-1969-RO + ISO646-JP-OCR-B JIS_C6229-1984-B + ISO646-YU JUS_I.B1.002 + ISO646-KR KSC5636 + ISO646-HU MSZ_7795.3 + ISO646-CU NC_NC00-10 + ISO646-FR NF_Z_62-010 + ISO646-FR1 NF_Z_62-010_1973 + ISO646-NO NS_4551-1 + ISO646-NO2 NS_4551-2 + ISO646-PT PT + ISO646-PT2 PT2 + ISO646-SE SEN_850200_B + ISO646-SE2 SEN_850200_C + ISO-8859-1 + ISO-8859-2 + ISO-8859-3 + ISO-8859-4 + ISO-8859-5 + ISO-8859-6 + ISO-8859-7 + ISO-8859-8 + ISO-8859-9 + ISO-8859-10 + #ISO-8859-11 No corresponding table, nonstandard + ISO-8859-13 + ISO-8859-14 + ISO-8859-15 + ISO-8859-16 + T.61-8BIT + ISO_6937 + #ISO_6937-2 ISO-IR-90 Handling of combining marks is broken + KOI-8 + KOI8-R + LATIN-GREEK + LATIN-GREEK-1 + HP-ROMAN8 + EBCDIC-AT-DE + EBCDIC-AT-DE-A + EBCDIC-CA-FR + EBCDIC-DK-NO + EBCDIC-DK-NO-A + EBCDIC-ES + EBCDIC-ES-A + EBCDIC-ES-S + EBCDIC-FI-SE + EBCDIC-FI-SE-A + EBCDIC-FR + EBCDIC-IS-FRISS + EBCDIC-IT + EBCDIC-PT + EBCDIC-UK + EBCDIC-US + IBM037 + IBM038 + IBM256 + IBM273 + IBM274 + IBM275 + IBM277 + IBM278 + IBM280 + IBM281 + IBM284 + IBM285 + IBM290 + IBM297 + IBM420 + IBM423 + IBM424 + IBM437 + IBM500 + IBM850 + IBM851 + IBM852 + IBM855 + IBM857 + IBM860 + IBM861 + IBM862 + IBM863 + IBM864 + IBM865 + IBM866 + IBM868 + IBM869 + IBM870 + IBM871 + IBM875 + IBM880 + IBM891 + IBM903 + IBM904 + IBM905 + IBM918 + IBM1004 + IBM1026 + IBM1047 + CP1250 + CP1251 + CP1252 + CP1253 + CP1254 + CP1255 + CP1256 + CP1257 + CP1258 + IBM874 + CP737 + CP775 + MACINTOSH + IEC_P27-1 + ASMO_449 + ISO-IR-99 ANSI_X3.110-1983 + ISO-IR-139 CSN_369103 + CWI + DEC-MCS + ECMA-CYRILLIC + ISO-IR-153 GOST_19768-74 + GREEK-CCITT + GREEK7 + GREEK7-OLD + INIS + INIS-8 + INIS-CYRILLIC + ISO_2033 ISO_2033-1983 + ISO_5427 + ISO_5427-EXT + #ISO_5428 Handling of combining marks is broken + ISO_10367-BOX + MAC-IS + MAC-UK + NATS-DANO + NATS-SEFI + WIN-SAMI-2 SAMI-WS2 + ISO-IR-197 + TIS-620 + KOI8-U + ISIRI-3342 + # + # Multibyte encodings come here + # + SJIS + #EUC-KR Charmap contains extraneous entries + CP949 + #JOHAB No charmap exists + BIG5 + #BIG5HKSCS Broken, please fix it + EUC-JP + EUC-CN GB2312 + #GBK Converter uses private area characters + EUC-TW + #GB18030 Broken, please fix it + # + # Stateful encodings not testable this way + # + #ISO-2022-JP + #ISO-2022-JP-2 + #ISO-2022-KR + #ISO-2022-CN + # +EOF +while read charset charmap; do + case ${charset} in \#*) continue;; esac + echo "Testing ${charset}" 1>&2 + ./tst-table.sh ${common_objpfx} ${objpfx} ${charset} ${charmap} \ + || { echo "failed: ./tst-table.sh ${common_objpfx} ${objpfx} ${charset} ${charmap}"; status=1; } +done + +exit $status diff --git a/linuxthreads/attr.c b/linuxthreads/attr.c index 90ab019837..ac3776a010 100644 --- a/linuxthreads/attr.c +++ b/linuxthreads/attr.c @@ -147,7 +147,7 @@ int __pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize) size_t ps = __getpagesize (); /* First round up the guard size. */ - guardsize = roundup (guardsize, ps); + guardsize = page_roundup (guardsize, ps); /* The guard size must not be larger than the stack itself */ if (guardsize >= attr->__stacksize) return EINVAL; diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 23bba4d498..f4a38c977a 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,20 @@ +2000-09-03 Bruno Haible <haible@clisp.cons.org> + + * charmaps/EUC-TW: Add commented non-reversible mappings. + +2000-09-03 Bruno Haible <haible@clisp.cons.org> + + * charmaps/CP949: New file. + +2000-09-03 Bruno Haible <haible@clisp.cons.org> + + * charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF. + +2000-09-03 Bruno Haible <haible@clisp.cons.org> + + * charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to + U+007E. + 2000-09-01 Ulrich Drepper <drepper@redhat.com> * locales/zh_HK: Use zh_TW data for LC_MESSAGES. diff --git a/sysdeps/ia64/Makefile b/sysdeps/ia64/Makefile index 5f04a9dd66..fcf62b8053 100644 --- a/sysdeps/ia64/Makefile +++ b/sysdeps/ia64/Makefile @@ -3,6 +3,6 @@ CPPFLAGS-start.S = -D__ASSEMBLY__ endif ifeq ($(subdir),elf) -dl-routines += dl-symaddr dl-fptr -rtld-routines += dl-symaddr dl-fptr +sysdep-dl-routines += dl-symaddr dl-fptr +sysdep_routines += $(sysdep-dl-routines) endif |