summaryrefslogtreecommitdiff
path: root/src/raptor_nfc_test.c
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2004-08-11 22:16:38 +0000
committerDave Beckett <dave@dajobe.org>2004-08-11 22:16:38 +0000
commit445b91bf24f754d5055548d517fc9391b06693fc (patch)
tree2430fff82d630a2e4ae33812a5e6cb2c79b15153 /src/raptor_nfc_test.c
parenta2c6848588cfa1de2cba795ad370e5e15e94fc46 (diff)
downloadraptor-445b91bf24f754d5055548d517fc9391b06693fc.tar.gz
Raptor NFC test
Diffstat (limited to 'src/raptor_nfc_test.c')
-rw-r--r--src/raptor_nfc_test.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/src/raptor_nfc_test.c b/src/raptor_nfc_test.c
new file mode 100644
index 00000000..eab50933
--- /dev/null
+++ b/src/raptor_nfc_test.c
@@ -0,0 +1,243 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_nfc_test.c - Raptor Unicode NFC validation check
+ *
+ * $Id$
+ *
+ * Copyright (C) 2004 David Beckett - http://purl.org/net/dajobe/
+ * Institute for Learning and Research Technology - http://www.ilrt.org/
+ * University of Bristol - http://www.bristol.ac.uk/
+ *
+ * This package is Free Software or Open Source available under the
+ * following licenses (these are alternatives):
+ * 1. GNU Lesser General Public License (LGPL)
+ * 2. GNU General Public License (GPL)
+ * 3. Mozilla Public License (MPL)
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * full license terms.
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#ifdef WIN32
+#include <win32_raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h> /* for isprint() */
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+/* Raptor includes */
+#include "raptor.h"
+#include "raptor_internal.h"
+#include "raptor_nfc.h"
+
+
+#undef RAPTOR_NFC_DECODE_DEBUG
+
+
+/**
+ * utf8_string: destination utf8 buffer (FIXME big enough!)
+ * unicode_string: first char of string
+ * end: last char of unicode_string
+ */
+static int
+decode_to_utf8(unsigned char *utf8_string, size_t utf8_string_length,
+ const char *unicode_string, const char *end)
+{
+ unsigned char *u=utf8_string;
+ const char *p=unicode_string;
+
+#ifdef RAPTOR_NFC_DECODE_DEBUG
+ fputs("decode_to_utf8: string '", stderr);
+ fwrite(unicode_string, sizeof(char), (end-unicode_string)+1, stderr);
+ fputs("' converts to:\n ", stderr);
+#endif
+
+ while(p < end) {
+ unsigned long c=0;
+ char *endptr;
+
+ if(*p == ' ') {
+ p++;
+ continue;
+ }
+
+ c=(unsigned long)strtol(p, &endptr, 16);
+
+#ifdef RAPTOR_NFC_DECODE_DEBUG
+ fprintf(stderr, "U+%04lX ", c);
+#endif
+
+ p=(unsigned char*)endptr;
+
+ u+= raptor_unicode_char_to_utf8(c, u);
+
+ if((u-utf8_string) > utf8_string_length) {
+ fprintf(stderr,
+ "decode_to_utf8 overwote utf8_string buffer at byte %d\n",
+ (u-utf8_string));
+ abort();
+ }
+ }
+
+#ifdef RAPTOR_NFC_DECODE_DEBUG
+ fputs("\n", stderr);
+#endif
+
+ return u-utf8_string;
+}
+
+
+
+static void
+utf8_print(const unsigned char *input, int length, FILE *stream)
+{
+ int i=0;
+
+ while(i<length && *input) {
+ unsigned long c;
+ int size=raptor_utf8_to_unicode_char(&c, input, length-i);
+ if(size <= 0)
+ return;
+ if(i)
+ fputc(' ', stream);
+ fprintf(stream, "U+%04X", (int)c);
+ input += size;
+ i += size;
+ }
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ char *program;
+ static const char *filename="NormalizationTest.txt";
+ FILE *fh;
+ int rc=0;
+ unsigned int line=1;
+ size_t max_c2_len=0;
+ size_t max_c4_len=0;
+ int passes=0;
+ int fails=0;
+
+ program=argv[0];
+ if(1) {
+ char *p;
+ if((p=strrchr(program, '/')))
+ program=p+1;
+ else if((p=strrchr(program, '\\')))
+ program=p+1;
+ argv[0]=program;
+ }
+
+
+ fh=fopen(filename, "r");
+ if(!fh) {
+ fprintf(stderr, "%s: file '%s' open failed - %s\n",
+ program, filename, strerror(errno));
+ return 1;
+ }
+
+#define LINE_BUFFER_SIZE 1024
+
+/* FIXME big enough for Unicode 4 (c2 max 16; c4 max 33) */
+#define UNISTR_SIZE 40
+
+ for(;!feof(fh); line++) {
+ char buffer[LINE_BUFFER_SIZE];
+ char *p, *start;
+ unsigned char nfc1[UNISTR_SIZE];
+ unsigned char nfc2[UNISTR_SIZE];
+ size_t nfc1_len, nfc2_len;
+ int nfc_rc;
+ int error;
+
+ p=fgets(buffer, LINE_BUFFER_SIZE, fh);
+ if(!p) {
+ if(ferror(fh)) {
+ fprintf(stderr, "%s: file '%s' read failed - %s\n",
+ program, filename, strerror(errno));
+ rc=1;
+ break;
+ }
+ /* assume feof */
+ break;
+ };
+
+#if 0
+ fprintf(stderr, "%s:%d: line '%s'\n", program, line, buffer);
+#endif
+
+ /* skip lines */
+ if(*p == '@' || *p == '#')
+ continue;
+
+ /* skip column 1 */
+ while(*p++ != ';')
+ ;
+
+ start=p;
+ /* find end column 2 */
+ while(*p++ != ';')
+ ;
+
+ nfc1_len=decode_to_utf8(nfc1, UNISTR_SIZE, start, p-1);
+ if(nfc1_len > max_c2_len)
+ max_c2_len=nfc1_len;
+
+ /* skip column 3 */
+ while(*p++ != ';')
+ ;
+
+ start=p;
+ /* find end column 4 */
+ while(*p++ != ';')
+ ;
+
+ nfc2_len=decode_to_utf8(nfc2, UNISTR_SIZE, start, p-1);
+ if(nfc2_len > max_c4_len)
+ max_c4_len=nfc2_len;
+
+ nfc_rc=raptor_nfc_check(nfc1, nfc1_len, &error);
+ if(!nfc_rc) {
+ fprintf(stderr, "%s:%d: NFC check 1 failed on: '", filename, line);
+ utf8_print(nfc1, nfc1_len, stderr);
+ fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc1_len);
+ fails++;
+ } else
+ passes++;
+
+ if(nfc1_len == nfc2_len && !memcmp(nfc1, nfc2, nfc1_len))
+ continue;
+
+ nfc_rc=raptor_nfc_check(nfc2, nfc2_len, &error);
+ if(!nfc_rc) {
+ fprintf(stderr, "%s:%d: NFC check 2 failed on: '", filename, line);
+ utf8_print(nfc2, nfc2_len, stderr);
+ fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc2_len);
+ fails++;
+ } else
+ passes++;
+
+ }
+
+ fclose(fh);
+
+ fprintf(stderr, "%s: max c2 len: %d, max c4 len: %d\n", program,
+ (int)max_c2_len, (int)max_c4_len);
+ fprintf(stderr, "%s: passes: %d fails: %d\n", program,
+ passes, fails);
+
+ return rc;
+}