diff options
author | Bruno Haible <bruno@clisp.org> | 2000-08-20 20:35:53 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2000-08-20 20:35:53 +0000 |
commit | ac65860e4b27fb1f332e0b5047ceedc1ab3e47b2 (patch) | |
tree | 203971c95c07ba993e5afecf1a0b4a6c2c9649ce /tests | |
parent | 3354d156d6fbebb692c9976f8d52b51ab60d7b66 (diff) | |
download | gperf-ac65860e4b27fb1f332e0b5047ceedc1ab3e47b2.tar.gz |
Add tests for 8-bit clean comparison and binary comparison.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/Makefile.in | 19 | ||||
-rw-r--r-- | tests/lang-ucs2.exp | 20 | ||||
-rw-r--r-- | tests/lang-ucs2.gperf | 26 | ||||
-rw-r--r-- | tests/lang-ucs2.in | bin | 0 -> 276 bytes | |||
-rw-r--r-- | tests/lang-utf8.exp | 20 | ||||
-rw-r--r-- | tests/lang-utf8.gperf | 26 | ||||
-rw-r--r-- | tests/test2.c | 45 |
7 files changed, 155 insertions, 1 deletions
diff --git a/tests/Makefile.in b/tests/Makefile.in index e947a36..504dc81 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -55,7 +55,7 @@ installdirs : uninstall : -check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-test +check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-lang-utf8 check-lang-ucs2 check-test @true extracheck : @CHECK_LANG_SYNTAX@ @@ -101,6 +101,23 @@ check-pascal: ./pout -v < $(srcdir)/pascal.gperf > pascal.out diff $(srcdir)/pascal.exp pascal.out +# check for 8-bit cleanliness +check-lang-utf8: + $(GPERF) -k1 -t -I -K foreign_name < $(srcdir)/lang-utf8.gperf > lu8inset.c + $(CC) $(CFLAGS) -o lu8out lu8inset.c test.o + @echo "testing UTF-8 encoded languages, all items should be found in the set" + sed -e '1,6d' -e 's/,.*//' < $(srcdir)/lang-utf8.gperf | ./lu8out -v > lang-utf8.out + diff $(srcdir)/lang-utf8.exp lang-utf8.out + +# check for binary keywords with NUL bytes +check-lang-ucs2: + $(CC) -c $(CFLAGS) $(srcdir)/test2.c + $(GPERF) -k4 -t -l -I -K foreign_name < $(srcdir)/lang-ucs2.gperf > lu2inset.c + $(CC) $(CFLAGS) -o lu2out lu2inset.c test2.o + @echo "testing UCS-2 encoded languages, all items should be found in the set" + ./lu2out -v < $(srcdir)/lang-ucs2.in > lang-ucs2.out + diff $(srcdir)/lang-ucs2.exp lang-ucs2.out + # these next 5 are demos that show off the generated code check-test: $(GPERF) -L C -F ', 0, 0' -p -j1 -i 1 -g -o -t -G -N is_reserved_word -k1,3,'$$' < $(srcdir)/c-parse.gperf > c-parse.out diff --git a/tests/lang-ucs2.exp b/tests/lang-ucs2.exp new file mode 100644 index 0000000..28db537 --- /dev/null +++ b/tests/lang-ucs2.exp @@ -0,0 +1,20 @@ +in word set 12A0121B122D129B +in word set 010D00650073006B0079 +in word set 00440061006E0073006B +in word set 0045006E0067006C006900730068 +in word set 00530075006F006D0069 +in word set 004600720061006E00E7006100690073 +in word set 0044006500750074007300630068 +in word set 039503BB03BB03B703BD03B903BA03AC +in word set 05E205D105E805D905EA +in word set 004900740061006C00690061006E006F +in word set 004E006F00720073006B +in word set 0420044304410441043A04380439 +in word set 004500730070006100F1006F006C +in word set 005300760065006E0073006B0061 +in word set 0E200E320E290E320E440E170E22 +in word set 005400FC0072006B00E70065 +in word set 005400691EBF006E00670020005600691EC70074 +in word set 65E5672C8A9E +in word set 4E2D6587 +in word set D55CAE00 diff --git a/tests/lang-ucs2.gperf b/tests/lang-ucs2.gperf new file mode 100644 index 0000000..8f2ac9e --- /dev/null +++ b/tests/lang-ucs2.gperf @@ -0,0 +1,26 @@ +struct language { + const char *foreign_name; + const char *english_name; + const char *locale; +}; +%% +"\x12\xA0\x12\x1B\x12\x2D\x12\x9B", "Amharic", NULL +"\x01\x0D\x00\x65\x00\x73\x00\x6B\x00\x79", "Czech", "cs_CZ.UTF-8" +"\x00\x44\x00\x61\x00\x6E\x00\x73\x00\x6B", "Danish", "da_DK.UTF-8" +"\x00\x45\x00\x6E\x00\x67\x00\x6C\x00\x69\x00\x73\x00\x68", "English", "en_GB.UTF-8" +"\x00\x53\x00\x75\x00\x6F\x00\x6D\x00\x69", "Finnish", "fi_FI.UTF-8" +"\x00\x46\x00\x72\x00\x61\x00\x6E\x00\xE7\x00\x61\x00\x69\x00\x73", "French", "fr_FR.UTF-8" +"\x00\x44\x00\x65\x00\x75\x00\x74\x00\x73\x00\x63\x00\x68", "German", "de_DE.UTF-8" +"\x03\x95\x03\xBB\x03\xBB\x03\xB7\x03\xBD\x03\xB9\x03\xBA\x03\xAC", "Greek", "el_GR.UTF-8" +"\x05\xE2\x05\xD1\x05\xE8\x05\xD9\x05\xEA", "Hebrew", "he_IL.UTF-8" +"\x00\x49\x00\x74\x00\x61\x00\x6C\x00\x69\x00\x61\x00\x6E\x00\x6F", "Italian", "it_IT.UTF-8" +"\x00\x4E\x00\x6F\x00\x72\x00\x73\x00\x6B", "Norwegian", "no_NO.UTF-8" +"\x04\x20\x04\x43\x04\x41\x04\x41\x04\x3A\x04\x38\x04\x39", "Russian", "ru_RU.UTF-8" +"\x00\x45\x00\x73\x00\x70\x00\x61\x00\xF1\x00\x6F\x00\x6C", "Spanish", "es_ES.UTF-8" +"\x00\x53\x00\x76\x00\x65\x00\x6E\x00\x73\x00\x6B\x00\x61", "Swedish", "sv_SE.UTF-8" +"\x0E\x20\x0E\x32\x0E\x29\x0E\x32\x0E\x44\x0E\x17\x0E\x22", "Thai", "th_TH.UTF-8" +"\x00\x54\x00\xFC\x00\x72\x00\x6B\x00\xE7\x00\x65", "Turkish", "tr_TR.UTF-8" +"\x00\x54\x00\x69\x1E\xBF\x00\x6E\x00\x67\x00\x20\x00\x56\x00\x69\x1E\xC7\x00\x74", "Vietnamese", "vi_VN.UTF-8" +"\x65\xE5\x67\x2C\x8A\x9E", "Japanese", "ja_JP.UTF-8" +"\x4E\x2D\x65\x87", "Chinese", "zh_CN.UTF-8" +"\xD5\x5C\xAE\x00", "Korean", "ko_KR.UTF-8" diff --git a/tests/lang-ucs2.in b/tests/lang-ucs2.in Binary files differnew file mode 100644 index 0000000..4766a7d --- /dev/null +++ b/tests/lang-ucs2.in diff --git a/tests/lang-utf8.exp b/tests/lang-utf8.exp new file mode 100644 index 0000000..56d9572 --- /dev/null +++ b/tests/lang-utf8.exp @@ -0,0 +1,20 @@ +in word set አማርኛ +in word set česky +in word set Dansk +in word set English +in word set Suomi +in word set Français +in word set Deutsch +in word set Ελληνικά +in word set עברית +in word set Italiano +in word set Norsk +in word set Русский +in word set Español +in word set Svenska +in word set ภาษาไทย +in word set Türkçe +in word set Tiếng Việt +in word set 日本語 +in word set 中文 +in word set 한글 diff --git a/tests/lang-utf8.gperf b/tests/lang-utf8.gperf new file mode 100644 index 0000000..2beacdd --- /dev/null +++ b/tests/lang-utf8.gperf @@ -0,0 +1,26 @@ +struct language { + const char *foreign_name; + const char *english_name; + const char *locale; +}; +%% +አማርኛ, "Amharic", NULL +česky, "Czech", "cs_CZ.UTF-8" +Dansk, "Danish", "da_DK.UTF-8" +English, "English", "en_GB.UTF-8" +Suomi, "Finnish", "fi_FI.UTF-8" +Français, "French", "fr_FR.UTF-8" +Deutsch, "German", "de_DE.UTF-8" +Ελληνικά, "Greek", "el_GR.UTF-8" +עברית, "Hebrew", "he_IL.UTF-8" +Italiano, "Italian", "it_IT.UTF-8" +Norsk, "Norwegian", "no_NO.UTF-8" +Русский, "Russian", "ru_RU.UTF-8" +Español, "Spanish", "es_ES.UTF-8" +Svenska, "Swedish", "sv_SE.UTF-8" +ภาษาไทย, "Thai", "th_TH.UTF-8" +Türkçe, "Turkish", "tr_TR.UTF-8" +Tiếng Việt, "Vietnamese", "vi_VN.UTF-8" +日本語, "Japanese", "ja_JP.UTF-8" +中文, "Chinese", "zh_CN.UTF-8" +한글, "Korean", "ko_KR.UTF-8" diff --git a/tests/test2.c b/tests/test2.c new file mode 100644 index 0000000..635794b --- /dev/null +++ b/tests/test2.c @@ -0,0 +1,45 @@ +/* + Tests the generated perfect hash function. + The -v option prints diagnostics as to whether a word is in + the set or not. Without -v the program is useful for timing. +*/ + +#include <stdio.h> + +#define MAX_LEN 80 + +int +main (argc, argv) + int argc; + char *argv[]; +{ + int verbose = argc > 1 ? 1 : 0; + char buf[2*MAX_LEN]; + int buflen; + + for (;;) + { + /* Simulate gets(buf) with 2 bytes per character. */ + char *p = buf; + while (fread (p, 2, 1, stdin) == 1) + { + if ((p[0] << 8) + p[1] == '\n') + break; + p += 2; + } + buflen = p - buf; + + if (buflen == 0) + break; + + if (in_word_set (buf, buflen) && verbose) + printf ("in word set "); + else if (verbose) + printf ("NOT in word set "); + for (p = buf; p < buf + buflen; p++) + printf ("%02X", (unsigned char) *p); + printf("\n"); + } + + return 0; +} |