summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2000-08-20 20:35:53 +0000
committerBruno Haible <bruno@clisp.org>2000-08-20 20:35:53 +0000
commitac65860e4b27fb1f332e0b5047ceedc1ab3e47b2 (patch)
tree203971c95c07ba993e5afecf1a0b4a6c2c9649ce /tests
parent3354d156d6fbebb692c9976f8d52b51ab60d7b66 (diff)
downloadgperf-ac65860e4b27fb1f332e0b5047ceedc1ab3e47b2.tar.gz
Add tests for 8-bit clean comparison and binary comparison.
Diffstat (limited to 'tests')
-rw-r--r--tests/Makefile.in19
-rw-r--r--tests/lang-ucs2.exp20
-rw-r--r--tests/lang-ucs2.gperf26
-rw-r--r--tests/lang-ucs2.inbin0 -> 276 bytes
-rw-r--r--tests/lang-utf8.exp20
-rw-r--r--tests/lang-utf8.gperf26
-rw-r--r--tests/test2.c45
7 files changed, 155 insertions, 1 deletions
diff --git a/tests/Makefile.in b/tests/Makefile.in
index e947a36..504dc81 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -55,7 +55,7 @@ installdirs :
uninstall :
-check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-test
+check : check-link-c check-link-c++ check-c check-ada check-modula3 check-pascal check-lang-utf8 check-lang-ucs2 check-test
@true
extracheck : @CHECK_LANG_SYNTAX@
@@ -101,6 +101,23 @@ check-pascal:
./pout -v < $(srcdir)/pascal.gperf > pascal.out
diff $(srcdir)/pascal.exp pascal.out
+# check for 8-bit cleanliness
+check-lang-utf8:
+ $(GPERF) -k1 -t -I -K foreign_name < $(srcdir)/lang-utf8.gperf > lu8inset.c
+ $(CC) $(CFLAGS) -o lu8out lu8inset.c test.o
+ @echo "testing UTF-8 encoded languages, all items should be found in the set"
+ sed -e '1,6d' -e 's/,.*//' < $(srcdir)/lang-utf8.gperf | ./lu8out -v > lang-utf8.out
+ diff $(srcdir)/lang-utf8.exp lang-utf8.out
+
+# check for binary keywords with NUL bytes
+check-lang-ucs2:
+ $(CC) -c $(CFLAGS) $(srcdir)/test2.c
+ $(GPERF) -k4 -t -l -I -K foreign_name < $(srcdir)/lang-ucs2.gperf > lu2inset.c
+ $(CC) $(CFLAGS) -o lu2out lu2inset.c test2.o
+ @echo "testing UCS-2 encoded languages, all items should be found in the set"
+ ./lu2out -v < $(srcdir)/lang-ucs2.in > lang-ucs2.out
+ diff $(srcdir)/lang-ucs2.exp lang-ucs2.out
+
# these next 5 are demos that show off the generated code
check-test:
$(GPERF) -L C -F ', 0, 0' -p -j1 -i 1 -g -o -t -G -N is_reserved_word -k1,3,'$$' < $(srcdir)/c-parse.gperf > c-parse.out
diff --git a/tests/lang-ucs2.exp b/tests/lang-ucs2.exp
new file mode 100644
index 0000000..28db537
--- /dev/null
+++ b/tests/lang-ucs2.exp
@@ -0,0 +1,20 @@
+in word set 12A0121B122D129B
+in word set 010D00650073006B0079
+in word set 00440061006E0073006B
+in word set 0045006E0067006C006900730068
+in word set 00530075006F006D0069
+in word set 004600720061006E00E7006100690073
+in word set 0044006500750074007300630068
+in word set 039503BB03BB03B703BD03B903BA03AC
+in word set 05E205D105E805D905EA
+in word set 004900740061006C00690061006E006F
+in word set 004E006F00720073006B
+in word set 0420044304410441043A04380439
+in word set 004500730070006100F1006F006C
+in word set 005300760065006E0073006B0061
+in word set 0E200E320E290E320E440E170E22
+in word set 005400FC0072006B00E70065
+in word set 005400691EBF006E00670020005600691EC70074
+in word set 65E5672C8A9E
+in word set 4E2D6587
+in word set D55CAE00
diff --git a/tests/lang-ucs2.gperf b/tests/lang-ucs2.gperf
new file mode 100644
index 0000000..8f2ac9e
--- /dev/null
+++ b/tests/lang-ucs2.gperf
@@ -0,0 +1,26 @@
+struct language {
+ const char *foreign_name;
+ const char *english_name;
+ const char *locale;
+};
+%%
+"\x12\xA0\x12\x1B\x12\x2D\x12\x9B", "Amharic", NULL
+"\x01\x0D\x00\x65\x00\x73\x00\x6B\x00\x79", "Czech", "cs_CZ.UTF-8"
+"\x00\x44\x00\x61\x00\x6E\x00\x73\x00\x6B", "Danish", "da_DK.UTF-8"
+"\x00\x45\x00\x6E\x00\x67\x00\x6C\x00\x69\x00\x73\x00\x68", "English", "en_GB.UTF-8"
+"\x00\x53\x00\x75\x00\x6F\x00\x6D\x00\x69", "Finnish", "fi_FI.UTF-8"
+"\x00\x46\x00\x72\x00\x61\x00\x6E\x00\xE7\x00\x61\x00\x69\x00\x73", "French", "fr_FR.UTF-8"
+"\x00\x44\x00\x65\x00\x75\x00\x74\x00\x73\x00\x63\x00\x68", "German", "de_DE.UTF-8"
+"\x03\x95\x03\xBB\x03\xBB\x03\xB7\x03\xBD\x03\xB9\x03\xBA\x03\xAC", "Greek", "el_GR.UTF-8"
+"\x05\xE2\x05\xD1\x05\xE8\x05\xD9\x05\xEA", "Hebrew", "he_IL.UTF-8"
+"\x00\x49\x00\x74\x00\x61\x00\x6C\x00\x69\x00\x61\x00\x6E\x00\x6F", "Italian", "it_IT.UTF-8"
+"\x00\x4E\x00\x6F\x00\x72\x00\x73\x00\x6B", "Norwegian", "no_NO.UTF-8"
+"\x04\x20\x04\x43\x04\x41\x04\x41\x04\x3A\x04\x38\x04\x39", "Russian", "ru_RU.UTF-8"
+"\x00\x45\x00\x73\x00\x70\x00\x61\x00\xF1\x00\x6F\x00\x6C", "Spanish", "es_ES.UTF-8"
+"\x00\x53\x00\x76\x00\x65\x00\x6E\x00\x73\x00\x6B\x00\x61", "Swedish", "sv_SE.UTF-8"
+"\x0E\x20\x0E\x32\x0E\x29\x0E\x32\x0E\x44\x0E\x17\x0E\x22", "Thai", "th_TH.UTF-8"
+"\x00\x54\x00\xFC\x00\x72\x00\x6B\x00\xE7\x00\x65", "Turkish", "tr_TR.UTF-8"
+"\x00\x54\x00\x69\x1E\xBF\x00\x6E\x00\x67\x00\x20\x00\x56\x00\x69\x1E\xC7\x00\x74", "Vietnamese", "vi_VN.UTF-8"
+"\x65\xE5\x67\x2C\x8A\x9E", "Japanese", "ja_JP.UTF-8"
+"\x4E\x2D\x65\x87", "Chinese", "zh_CN.UTF-8"
+"\xD5\x5C\xAE\x00", "Korean", "ko_KR.UTF-8"
diff --git a/tests/lang-ucs2.in b/tests/lang-ucs2.in
new file mode 100644
index 0000000..4766a7d
--- /dev/null
+++ b/tests/lang-ucs2.in
Binary files differ
diff --git a/tests/lang-utf8.exp b/tests/lang-utf8.exp
new file mode 100644
index 0000000..56d9572
--- /dev/null
+++ b/tests/lang-utf8.exp
@@ -0,0 +1,20 @@
+in word set አማርኛ
+in word set česky
+in word set Dansk
+in word set English
+in word set Suomi
+in word set Français
+in word set Deutsch
+in word set Ελληνικά
+in word set עברית
+in word set Italiano
+in word set Norsk
+in word set Русский
+in word set Español
+in word set Svenska
+in word set ภาษาไทย
+in word set Türkçe
+in word set Tiếng Việt
+in word set 日本語
+in word set 中文
+in word set 한글
diff --git a/tests/lang-utf8.gperf b/tests/lang-utf8.gperf
new file mode 100644
index 0000000..2beacdd
--- /dev/null
+++ b/tests/lang-utf8.gperf
@@ -0,0 +1,26 @@
+struct language {
+ const char *foreign_name;
+ const char *english_name;
+ const char *locale;
+};
+%%
+አማርኛ, "Amharic", NULL
+česky, "Czech", "cs_CZ.UTF-8"
+Dansk, "Danish", "da_DK.UTF-8"
+English, "English", "en_GB.UTF-8"
+Suomi, "Finnish", "fi_FI.UTF-8"
+Français, "French", "fr_FR.UTF-8"
+Deutsch, "German", "de_DE.UTF-8"
+Ελληνικά, "Greek", "el_GR.UTF-8"
+עברית, "Hebrew", "he_IL.UTF-8"
+Italiano, "Italian", "it_IT.UTF-8"
+Norsk, "Norwegian", "no_NO.UTF-8"
+Русский, "Russian", "ru_RU.UTF-8"
+Español, "Spanish", "es_ES.UTF-8"
+Svenska, "Swedish", "sv_SE.UTF-8"
+ภาษาไทย, "Thai", "th_TH.UTF-8"
+Türkçe, "Turkish", "tr_TR.UTF-8"
+Tiếng Việt, "Vietnamese", "vi_VN.UTF-8"
+日本語, "Japanese", "ja_JP.UTF-8"
+中文, "Chinese", "zh_CN.UTF-8"
+한글, "Korean", "ko_KR.UTF-8"
diff --git a/tests/test2.c b/tests/test2.c
new file mode 100644
index 0000000..635794b
--- /dev/null
+++ b/tests/test2.c
@@ -0,0 +1,45 @@
+/*
+ Tests the generated perfect hash function.
+ The -v option prints diagnostics as to whether a word is in
+ the set or not. Without -v the program is useful for timing.
+*/
+
+#include <stdio.h>
+
+#define MAX_LEN 80
+
+int
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int verbose = argc > 1 ? 1 : 0;
+ char buf[2*MAX_LEN];
+ int buflen;
+
+ for (;;)
+ {
+ /* Simulate gets(buf) with 2 bytes per character. */
+ char *p = buf;
+ while (fread (p, 2, 1, stdin) == 1)
+ {
+ if ((p[0] << 8) + p[1] == '\n')
+ break;
+ p += 2;
+ }
+ buflen = p - buf;
+
+ if (buflen == 0)
+ break;
+
+ if (in_word_set (buf, buflen) && verbose)
+ printf ("in word set ");
+ else if (verbose)
+ printf ("NOT in word set ");
+ for (p = buf; p < buf + buflen; p++)
+ printf ("%02X", (unsigned char) *p);
+ printf("\n");
+ }
+
+ return 0;
+}