summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Silverstone <dsilvers@netsurf-browser.org>2010-12-04 15:28:50 +0000
committerDaniel Silverstone <dsilvers@netsurf-browser.org>2010-12-04 15:28:50 +0000
commit2fee3114b46682b220c73aae7c4f2466dedb81ff (patch)
treed9dc3de599ec43fdf5cf70b59ac1fd049672914e
parentd485dbd52ebc6a911a5ddcf6891212fccb82e8c0 (diff)
downloadlibparserutils-2fee3114b46682b220c73aae7c4f2466dedb81ff.tar.gz
Remove init/final code and turn aliases into static data structure. r=vince
svn path=/trunk/libparserutils/; revision=10961
-rw-r--r--build/Aliases (renamed from test/data/Aliases)0
-rw-r--r--build/make-aliases.pl124
-rw-r--r--include/parserutils/parserutils.h7
-rw-r--r--src/Makefile2
-rw-r--r--src/charset/Makefile10
-rw-r--r--src/charset/aliases.c459
-rw-r--r--src/charset/aliases.h14
-rw-r--r--src/charset/charset.c54
-rw-r--r--src/charset/charset.h24
-rw-r--r--src/parserutils.c54
-rw-r--r--test/INDEX2
-rw-r--r--test/Makefile4
-rw-r--r--test/aliases.c17
-rw-r--r--test/charset.c31
-rw-r--r--test/cscodec-8859.c7
-rw-r--r--test/cscodec-ext8.c7
-rw-r--r--test/cscodec-utf16.c7
-rw-r--r--test/cscodec-utf8.c7
-rw-r--r--test/filter.c6
-rw-r--r--test/inputstream.c6
-rw-r--r--test/parserutils.c30
-rw-r--r--test/regression/INDEX1
-rw-r--r--test/regression/Makefile2
-rw-r--r--test/regression/cscodec-segv.c38
-rw-r--r--test/regression/filter-badenc-segv.c5
-rw-r--r--test/regression/filter-segv.c5
-rw-r--r--test/regression/stream-nomem.c5
27 files changed, 207 insertions, 721 deletions
diff --git a/test/data/Aliases b/build/Aliases
index db61ff1..db61ff1 100644
--- a/test/data/Aliases
+++ b/build/Aliases
diff --git a/build/make-aliases.pl b/build/make-aliases.pl
new file mode 100644
index 0000000..f677c06
--- /dev/null
+++ b/build/make-aliases.pl
@@ -0,0 +1,124 @@
+#!/usr/bin/perl -w
+# This file is part of LibParserUtils.
+# Licensed under the MIT License,
+# http://www.opensource.org/licenses/mit-license.php
+# Copyright 2010 Daniel Silverstone <dsilvers@netsurf-browser.org>
+# John-Mark Bell <jmb@netsurf-browser.org>
+
+use strict;
+
+use constant ALIAS_FILE => 'build/Aliases';
+use constant ALIAS_INC => 'src/charset/aliases.inc';
+
+use constant UNICODE_CHARSETS =>
+ [
+ qr'^ISO-10646-UCS-[24]$',
+ qr'^UTF-16',
+ qr'^UTF-8$',
+ qr'^UTF-32'
+ ];
+
+open(INFILE, "<", ALIAS_FILE) || die "Unable to open " . ALIAS_FILE;
+
+my %charsets;
+
+while (my $line = <INFILE>) {
+ last unless (defined $line);
+ next if ($line =~ /^#/);
+ chomp $line;
+ next if ($line eq '');
+ my @elements = split /\s+/, $line;
+ my $canon = shift @elements;
+ my $mibenum = shift @elements;
+ $charsets{$canon} = [$mibenum, \@elements];
+}
+
+close(INFILE);
+
+my $unicodeexp = "";
+
+my $output = <<'EOH';
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2010 The NetSurf Project.
+ *
+ * Note: This file is automatically generated by make-aliases.pl
+ *
+ * Do not edit file file, changes will be overwritten during build.
+ */
+
+static parserutils_charset_aliases_canon canonical_charset_names[] = {
+EOH
+
+my %aliases;
+my $canonnr = 0;
+foreach my $canon (sort keys %charsets) {
+ my ($mibenum, $elements) = @{$charsets{$canon}};
+ # Ordering must match struct in src/charset/aliases.h
+ $output .= "\t{ " . $mibenum . ", " . length($canon) . ', "' . $canon . '" },' . "\n";
+ my $isunicode = 0;
+ foreach my $unirexp (@{UNICODE_CHARSETS()}) {
+ $isunicode = 1 if ($canon =~ $unirexp);
+ }
+ if ($isunicode == 1) {
+ $unicodeexp .= "((x) == $mibenum) || ";
+ }
+ $canon =~ y/A-Z/a-z/;
+ $canon =~ s/[^a-z0-9]//g;
+ $aliases{$canon} = $canonnr;
+ foreach my $alias (@$elements) {
+ $alias =~ y/A-Z/a-z/;
+ $alias =~ s/[^a-z0-9]//g;
+ $aliases{$alias} = $canonnr;
+ }
+ $canonnr += 1;
+}
+
+$output .= "};\n\nstatic const uint16_t charset_aliases_canon_count = ${canonnr};\n\n";
+
+$output .= <<'EOT';
+typedef struct {
+ uint16_t name_len;
+ const char *name;
+ parserutils_charset_aliases_canon *canon;
+} parserutils_charset_aliases_alias;
+
+static parserutils_charset_aliases_alias charset_aliases[] = {
+EOT
+
+my $aliascount = 0;
+
+foreach my $alias (sort keys %aliases) {
+ my $canonnr = $aliases{$alias};
+ $output .= "\t{ " . length($alias) . ', "' . $alias . '", &canonical_charset_names[' . $canonnr . "] },\n";
+ $aliascount += 1;
+}
+
+$output .= "};\n\n";
+
+# Drop the final " || "
+chop $unicodeexp;
+chop $unicodeexp;
+chop $unicodeexp;
+chop $unicodeexp;
+
+$output .= <<"EOS";
+static const uint16_t charset_aliases_count = ${aliascount};
+
+#define MIBENUM_IS_UNICODE(x) ($unicodeexp)
+EOS
+
+if (open(EXISTING, "<", ALIAS_INC)) {
+ local $/ = undef();
+ my $now = <EXISTING>;
+ undef($output) if ($output eq $now);
+ close(EXISTING);
+}
+
+if (defined($output)) {
+ open(OUTF, ">", ALIAS_INC);
+ print OUTF $output;
+ close(OUTF);
+}
diff --git a/include/parserutils/parserutils.h b/include/parserutils/parserutils.h
index aa2b5ac..8518f42 100644
--- a/include/parserutils/parserutils.h
+++ b/include/parserutils/parserutils.h
@@ -17,13 +17,6 @@ extern "C"
#include <parserutils/functypes.h>
#include <parserutils/types.h>
-/* Initialise the ParserUtils library for use */
-parserutils_error parserutils_initialise(const char *aliases_file,
- parserutils_alloc alloc, void *pw);
-
-/* Clean up after ParserUtils */
-parserutils_error parserutils_finalise(parserutils_alloc alloc, void *pw);
-
#ifdef __cplusplus
}
#endif
diff --git a/src/Makefile b/src/Makefile
index 334dd43..3cbaf86 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,4 +1,2 @@
-# Sources
-DIR_SOURCES := parserutils.c
include build/makefiles/Makefile.subdir
diff --git a/src/charset/Makefile b/src/charset/Makefile
index a4c8f64..d851b8e 100644
--- a/src/charset/Makefile
+++ b/src/charset/Makefile
@@ -1,4 +1,12 @@
# Sources
-DIR_SOURCES := aliases.c charset.c codec.c
+DIR_SOURCES := aliases.c codec.c
+
+$(DIR)aliases.c: $(DIR)aliases.inc
+
+$(DIR)aliases.inc: build/make-aliases.pl build/Aliases
+ $(VQ)$(ECHO) " ALIAS: $@"
+ $(Q)$(PERL) build/make-aliases.pl
+
+CLEAN_ITEMS := $(CLEAN_ITEMS) $(DIR)aliases.inc
include build/makefiles/Makefile.subdir
diff --git a/src/charset/aliases.c b/src/charset/aliases.c
index 188a275..5c173d0 100644
--- a/src/charset/aliases.c
+++ b/src/charset/aliases.c
@@ -17,151 +17,77 @@
#include "charset/aliases.h"
#include "utils/utils.h"
-struct alias {
- struct alias *next;
- parserutils_charset_aliases_canon *canon;
- uint16_t name_len;
- char name[1];
-};
+/* Bring in the aliases tables */
+#include "aliases.inc"
-#define HASH_SIZE (43)
-static parserutils_charset_aliases_canon *canon_tab[HASH_SIZE];
-static struct alias *alias_tab[HASH_SIZE];
+typedef struct {
+ size_t slen;
+ const char *s;
+} lengthed_string;
-static parserutils_error parserutils_charset_create_alias(const char *alias,
- parserutils_charset_aliases_canon *c,
- parserutils_alloc alloc, void *pw);
-static parserutils_charset_aliases_canon *parserutils_charset_create_canon(
- const char *canon, uint16_t mibenum,
- parserutils_alloc alloc, void *pw);
-static int aliascmp(const char *s1, const char *s2, size_t s2_len);
-static uint32_t parserutils_charset_hash_val(const char *alias, size_t len);
-/**
- * Create alias data from Aliases file
- *
- * \param filename The path to the Aliases file
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, appropriate error otherwise.
- */
-parserutils_error parserutils_charset_aliases_create(const char *filename,
- parserutils_alloc alloc, void *pw)
-{
- char buf[300];
- FILE *fp;
-
- if (filename == NULL || alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- fp = fopen(filename, "r");
- if (fp == NULL)
- return PARSERUTILS_FILENOTFOUND;
-
- while (fgets(buf, sizeof buf, fp)) {
- char *p, *aliases = 0, *mib, *end;
- parserutils_charset_aliases_canon *cf;
-
- if (buf[0] == 0 || buf[0] == '#')
- /* skip blank lines or comments */
- continue;
-
- buf[strlen(buf) - 1] = 0; /* lose terminating newline */
- end = buf + strlen(buf);
-
- /* find end of canonical form */
- for (p = buf; *p && !isspace(*p) && !iscntrl(*p); p++)
- ; /* do nothing */
- if (p >= end)
- continue;
- *p++ = '\0'; /* terminate canonical form */
-
- /* skip whitespace */
- for (; *p && isspace(*p); p++)
- ; /* do nothing */
- if (p >= end)
- continue;
- mib = p;
-
- /* find end of mibenum */
- for (; *p && !isspace(*p) && !iscntrl(*p); p++)
- ; /* do nothing */
- if (p < end)
- *p++ = '\0'; /* terminate mibenum */
-
- cf = parserutils_charset_create_canon(buf, atoi(mib), alloc, pw);
- if (cf == NULL)
- continue;
-
- /* skip whitespace */
- for (; p < end && *p && isspace(*p); p++)
- ; /* do nothing */
- if (p >= end)
- continue;
- aliases = p;
-
- while (p < end) {
- /* find end of alias */
- for (; *p && !isspace(*p) && !iscntrl(*p); p++)
- ; /* do nothing */
- if (p > end)
- /* stop if we've gone past the end */
- break;
- /* terminate current alias */
- *p++ = '\0';
+#define IS_PUNCT_OR_SPACE(x) \
+ (!(((x) >= 'A' && (x) <= 'Z') || \
+ ((x) >= 'a' && (x) <= 'z') || \
+ ((x) >= '0' && (x) <= '9')))
- if (parserutils_charset_create_alias(aliases, cf,
- alloc, pw) != PARSERUTILS_OK)
- break;
- /* in terminating, we may have advanced
- * past the end - check this here */
- if (p >= end)
- break;
-
- /* skip whitespace */
- for (; *p && isspace(*p); p++)
- ; /* do nothing */
-
- if (p >= end)
- /* gone past end => stop */
- break;
-
- /* update pointer to current alias */
- aliases = p;
- }
- }
-
- fclose(fp);
-
- return PARSERUTILS_OK;
+static int parserutils_charset_alias_match(const void *a, const void *b)
+{
+ lengthed_string *s = (lengthed_string *)a;
+ parserutils_charset_aliases_alias *alias = (parserutils_charset_aliases_alias*)b;
+ size_t key_left = s->slen;
+ size_t alias_left = alias->name_len;
+ const char *s_alias = alias->name;
+ const char *s_key = s->s;
+ int cmpret;
+
+ while ((key_left > 0) && (alias_left > 0)) {
+ while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) {
+ key_left--; s_key++;
+ }
+
+ if (key_left == 0)
+ break;
+
+ cmpret = tolower(*s_key) - *s_alias;
+
+ if (cmpret != 0) {
+ return cmpret;
+ }
+
+ key_left--;
+ s_key++;
+ alias_left--;
+ s_alias++;
+ }
+
+ return key_left - alias_left;
}
/**
- * Free all alias data
+ * Retrieve the canonical form of an alias name
*
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data
+ * \param alias The alias name
+ * \param len The length of the alias name
+ * \return Pointer to canonical form or NULL if not found
*/
-void parserutils_charset_aliases_destroy(parserutils_alloc alloc, void *pw)
+parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise(
+ const char *alias, size_t len)
{
- parserutils_charset_aliases_canon *c, *d;
- struct alias *a, *b;
- int i;
-
- for (i = 0; i != HASH_SIZE; i++) {
- for (c = canon_tab[i]; c; c = d) {
- d = c->next;
- alloc(c, 0, pw);
- }
- canon_tab[i] = NULL;
-
- for (a = alias_tab[i]; a; a = b) {
- b = a->next;
- alloc(a, 0, pw);
- }
- alias_tab[i] = NULL;
- }
+ parserutils_charset_aliases_alias *c;
+ lengthed_string s = {len, alias};
+
+ c = (parserutils_charset_aliases_alias*)bsearch(&s,
+ &charset_aliases[0],
+ charset_aliases_count,
+ sizeof(parserutils_charset_aliases_alias),
+ parserutils_charset_alias_match);
+
+ if (c == NULL)
+ return NULL;
+
+ return c->canon;
}
/**
@@ -195,13 +121,14 @@ const char *parserutils_charset_mibenum_to_name(uint16_t mibenum)
{
int i;
parserutils_charset_aliases_canon *c;
-
- for (i = 0; i != HASH_SIZE; i++)
- for (c = canon_tab[i]; c; c = c->next)
- if (c->mib_enum == mibenum)
- return c->name;
-
- return NULL;
+
+ for (i = 0; i < charset_aliases_canon_count; ++i) {
+ c = &canonical_charset_names[i];
+ if (c->mib_enum == mibenum)
+ return c->name;
+ }
+
+ return NULL;
}
/**
@@ -212,253 +139,5 @@ const char *parserutils_charset_mibenum_to_name(uint16_t mibenum)
*/
bool parserutils_charset_mibenum_is_unicode(uint16_t mibenum)
{
- static uint16_t ucs4;
- static uint16_t ucs2;
- static uint16_t utf8;
- static uint16_t utf16;
- static uint16_t utf16be;
- static uint16_t utf16le;
- static uint16_t utf32;
- static uint16_t utf32be;
- static uint16_t utf32le;
-
- if (ucs4 == 0) {
- ucs4 = parserutils_charset_mibenum_from_name("UCS-4",
- SLEN("UCS-4"));
- ucs2 = parserutils_charset_mibenum_from_name("UCS-2",
- SLEN("UCS-2"));
- utf8 = parserutils_charset_mibenum_from_name("UTF-8",
- SLEN("UTF-8"));
- utf16 = parserutils_charset_mibenum_from_name("UTF-16",
- SLEN("UTF-16"));
- utf16be = parserutils_charset_mibenum_from_name("UTF-16BE",
- SLEN("UTF-16BE"));
- utf16le = parserutils_charset_mibenum_from_name("UTF-16LE",
- SLEN("UTF-16LE"));
- utf32 = parserutils_charset_mibenum_from_name("UTF-32",
- SLEN("UTF-32"));
- utf32be = parserutils_charset_mibenum_from_name("UTF-32BE",
- SLEN("UTF-32BE"));
- utf32le = parserutils_charset_mibenum_from_name("UTF-32LE",
- SLEN("UTF-32LE"));
- }
-
- return (mibenum == ucs4 || mibenum == ucs2 || mibenum == utf8 ||
- mibenum == utf16 || mibenum == utf16be ||
- mibenum == utf16le || mibenum == utf32 ||
- mibenum == utf32be || mibenum == utf32le);
-}
-
-#define IS_PUNCT_OR_SPACE(x) \
- ((0x09 <= (x) && (x) <= 0x0D) || \
- (0x20 <= (x) && (x) <= 0x2F) || \
- (0x3A <= (x) && (x) <= 0x40) || \
- (0x5B <= (x) && (x) <= 0x60) || \
- (0x7B <= (x) && (x) <= 0x7E))
-
-
-/**
- * Compare name "s1" to name "s2" (of size s2_len) case-insensitively
- * and ignoring ASCII punctuation characters.
- *
- * See http://www.whatwg.org/specs/web-apps/current-work/#character0
- *
- * \param s1 Alias to compare to
- * \param s2 Alias to compare
- * \param s2_len Length of "s2"
- * \returns 0 if equal, 1 otherwise
- */
-int aliascmp(const char *s1, const char *s2, size_t s2_len)
-{
- size_t s2_pos = 0;
-
- if (s1 == NULL || s2_len == 0)
- return 1;
-
- while (true) {
- while (IS_PUNCT_OR_SPACE(*s1))
- s1++;
- while (s2_pos < s2_len &&
- IS_PUNCT_OR_SPACE(s2[s2_pos])) {
- s2_pos++;
- }
-
- if (s2_pos == s2_len)
- return (*s1 != '\0') ? 1 : 0;
-
- if (tolower(*s1) != tolower(s2[s2_pos]))
- break;
- s1++;
- s2_pos++;
- }
-
- return 1;
-}
-
-
-/**
- * Retrieve the canonical form of an alias name
- *
- * \param alias The alias name
- * \param len The length of the alias name
- * \return Pointer to canonical form or NULL if not found
- */
-parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise(
- const char *alias, size_t len)
-{
- uint32_t hash;
- parserutils_charset_aliases_canon *c;
- struct alias *a;
-
- if (alias == NULL)
- return NULL;
-
- hash = parserutils_charset_hash_val(alias, len);
-
- for (c = canon_tab[hash]; c; c = c->next)
- if (aliascmp(c->name, alias, len) == 0)
- break;
- if (c)
- return c;
-
- for (a = alias_tab[hash]; a; a = a->next)
- if (aliascmp(a->name, alias, len) == 0)
- break;
- if (a)
- return a->canon;
-
- return NULL;
-}
-
-
-/**
- * Create an alias
- *
- * \param alias The alias name
- * \param c The canonical form
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, appropriate error otherwise
- */
-parserutils_error parserutils_charset_create_alias(const char *alias,
- parserutils_charset_aliases_canon *c,
- parserutils_alloc alloc, void *pw)
-{
- struct alias *a;
- uint32_t hash;
-
- if (alias == NULL || c == NULL || alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- a = alloc(NULL, sizeof(struct alias) + strlen(alias) + 1, pw);
- if (a == NULL)
- return PARSERUTILS_NOMEM;
-
- a->canon = c;
- a->name_len = strlen(alias);
- strcpy(a->name, alias);
- a->name[a->name_len] = '\0';
-
- hash = parserutils_charset_hash_val(alias, a->name_len);
-
- a->next = alias_tab[hash];
- alias_tab[hash] = a;
-
- return PARSERUTILS_OK;
-}
-
-/**
- * Create a canonical form
- *
- * \param canon The canonical name
- * \param mibenum The MIB enum value
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return Pointer to canonical form or NULL on error
- */
-parserutils_charset_aliases_canon *parserutils_charset_create_canon(
- const char *canon, uint16_t mibenum,
- parserutils_alloc alloc, void *pw)
-{
- parserutils_charset_aliases_canon *c;
- uint32_t hash, len;
-
- if (canon == NULL || alloc == NULL)
- return NULL;
-
- len = strlen(canon);
-
- c = alloc(NULL, sizeof(parserutils_charset_aliases_canon) + len + 1, pw);
- if (c == NULL)
- return NULL;
-
- c->mib_enum = mibenum;
- c->name_len = len;
- strcpy(c->name, canon);
- c->name[len] = '\0';
-
- hash = parserutils_charset_hash_val(canon, len);
-
- c->next = canon_tab[hash];
- canon_tab[hash] = c;
-
- return c;
-}
-
-/**
- * Hash function
- *
- * \param alias String to hash
- * \param len Number of bytes to hash (<= strlen(alias))
- * \return The hashed value
- */
-uint32_t parserutils_charset_hash_val(const char *alias, size_t len)
-{
- const char *s = alias;
- uint32_t h = 5381;
-
- if (alias == NULL)
- return 0;
-
- while (len--) {
- if (IS_PUNCT_OR_SPACE(*s)) {
- s++;
- } else {
- h = (h * 33) ^ (*s++ & ~0x20); /* case insensitive */
- }
- }
-
- return h % HASH_SIZE;
-}
-
-
-#ifndef NDEBUG
-/**
- * Dump all alias data to stdout
- */
-void parserutils_charset_aliases_dump(void)
-{
- parserutils_charset_aliases_canon *c;
- struct alias *a;
- int i;
- size_t size = 0;
-
- for (i = 0; i != HASH_SIZE; i++) {
- for (c = canon_tab[i]; c; c = c->next) {
- printf("%d %s\n", i, c->name);
- size += offsetof(parserutils_charset_aliases_canon,
- name) + c->name_len;
- }
-
- for (a = alias_tab[i]; a; a = a->next) {
- printf("%d %s\n", i, a->name);
- size += offsetof(struct alias, name) + a->name_len;
- }
- }
-
- size += (sizeof(canon_tab) / sizeof(canon_tab[0]));
- size += (sizeof(alias_tab) / sizeof(alias_tab[0]));
-
- printf("%u\n", (unsigned int) size);
+ return MIBENUM_IS_UNICODE(mibenum);
}
-#endif
diff --git a/src/charset/aliases.h b/src/charset/aliases.h
index 9abd2c8..189f8d5 100644
--- a/src/charset/aliases.h
+++ b/src/charset/aliases.h
@@ -13,24 +13,14 @@
#include <parserutils/charset/mibenum.h>
typedef struct parserutils_charset_aliases_canon {
- struct parserutils_charset_aliases_canon *next;
+ /* Do not change the ordering here without changing make-aliases.pl */
uint16_t mib_enum;
uint16_t name_len;
- char name[1];
+ const char *name;
} parserutils_charset_aliases_canon;
-/* Load encoding aliases from file */
-parserutils_error parserutils_charset_aliases_create(const char *filename,
- parserutils_alloc alloc, void *pw);
-/* Destroy encoding aliases */
-void parserutils_charset_aliases_destroy(parserutils_alloc alloc, void *pw);
-
/* Canonicalise an alias name */
parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise(
const char *alias, size_t len);
-#ifndef NDEBUG
-void parserutils_charset_aliases_dump(void);
-#endif
-
#endif
diff --git a/src/charset/charset.c b/src/charset/charset.c
deleted file mode 100644
index 3ef1a71..0000000
--- a/src/charset/charset.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This file is part of LibParserUtils.
- * Licensed under the MIT License,
- * http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
- */
-
-#include "charset/aliases.h"
-#include "charset/charset.h"
-
-/**
- * Initialise the Charset library for use.
- *
- * This _must_ be called before using any libparserutils charset functions
- *
- * \param aliases_file Pointer to name of file containing encoding alias data
- * \param alloc Pointer to (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, applicable error otherwise.
- */
-parserutils_error parserutils_charset_initialise(const char *aliases_file,
- parserutils_alloc alloc, void *pw)
-{
- parserutils_error error;
-
- if (aliases_file == NULL || alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- error = parserutils_charset_aliases_create(aliases_file, alloc, pw);
- if (error != PARSERUTILS_OK)
- return error;
-
- return PARSERUTILS_OK;
-}
-
-/**
- * Clean up after Libparserutils
- *
- * \param alloc Pointer to (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, applicable error otherwise.
- */
-parserutils_error parserutils_charset_finalise(parserutils_alloc alloc,
- void *pw)
-{
- if (alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- parserutils_charset_aliases_destroy(alloc, pw);
-
- return PARSERUTILS_OK;
-}
-
-
diff --git a/src/charset/charset.h b/src/charset/charset.h
deleted file mode 100644
index 4b07577..0000000
--- a/src/charset/charset.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is part of LibParserUtils.
- * Licensed under the MIT License,
- * http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
- */
-
-#ifndef parserutils_charset_charset_h_
-#define parserutils_charset_charset_h_
-
-#include <parserutils/errors.h>
-#include <parserutils/functypes.h>
-#include <parserutils/types.h>
-
-/* Initialise the Charset library for use */
-parserutils_error parserutils_charset_initialise(const char *aliases_file,
- parserutils_alloc alloc, void *pw);
-
-/* Clean up after Charset */
-parserutils_error parserutils_charset_finalise(parserutils_alloc alloc,
- void *pw);
-
-#endif
-
diff --git a/src/parserutils.c b/src/parserutils.c
deleted file mode 100644
index ed9b21f..0000000
--- a/src/parserutils.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This file is part of LibParserUtils.
- * Licensed under the MIT License,
- * http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
- */
-
-#include <parserutils/parserutils.h>
-
-#include "charset/charset.h"
-
-/**
- * Initialise the ParserUtils library for use.
- *
- * This _must_ be called before using any libparserutils functions
- *
- * \param aliases_file Pointer to name of file containing encoding alias data
- * \param alloc Pointer to (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, applicable error otherwise.
- */
-parserutils_error parserutils_initialise(const char *aliases_file,
- parserutils_alloc alloc, void *pw)
-{
- parserutils_error error;
-
- if (aliases_file == NULL || alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- error = parserutils_charset_initialise(aliases_file, alloc, pw);
- if (error != PARSERUTILS_OK)
- return error;
-
- return PARSERUTILS_OK;
-}
-
-/**
- * Clean up after Libparserutils
- *
- * \param alloc Pointer to (de)allocation function
- * \param pw Pointer to client-specific private data (may be NULL)
- * \return PARSERUTILS_OK on success, applicable error otherwise.
- */
-parserutils_error parserutils_finalise(parserutils_alloc alloc, void *pw)
-{
- if (alloc == NULL)
- return PARSERUTILS_BADPARM;
-
- parserutils_charset_finalise(alloc, pw);
-
- return PARSERUTILS_OK;
-}
-
-
diff --git a/test/INDEX b/test/INDEX
index 537124b..2ada1a8 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -2,8 +2,6 @@
#
# Test Description DataDir
-charset Charset initialisation/finalisation
-parserutils Library initialisation/finalisation
aliases Encoding alias handling
cscodec-utf8 UTF-8 charset codec implementation cscodec-utf8
cscodec-utf16 UTF-16 charset codec implementation cscodec-utf16
diff --git a/test/Makefile b/test/Makefile
index 4dc7c7b..afd38a7 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,7 +1,7 @@
# Tests
DIR_TEST_ITEMS := aliases:aliases.c cscodec-8859:cscodec-8859.c \
cscodec-ext8:cscodec-ext8.c cscodec-utf8:cscodec-utf8.c \
- cscodec-utf16:cscodec-utf16.c charset:charset.c filter:filter.c \
- inputstream:inputstream.c parserutils:parserutils.c
+ cscodec-utf16:cscodec-utf16.c filter:filter.c \
+ inputstream:inputstream.c
include build/makefiles/Makefile.subdir
diff --git a/test/aliases.c b/test/aliases.c
index b0e8e60..9a9f962 100644
--- a/test/aliases.c
+++ b/test/aliases.c
@@ -5,15 +5,6 @@
#include "testutils.h"
-extern void charset_aliases_dump(void);
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
int main (int argc, char **argv)
{
parserutils_charset_aliases_canon *c;
@@ -23,12 +14,6 @@ int main (int argc, char **argv)
return 1;
}
- parserutils_charset_aliases_create(argv[1], myrealloc, NULL);
-
-#ifndef NDEBUG
- parserutils_charset_aliases_dump();
-#endif
-
c = parserutils_charset_alias_canonicalise("moose", 5);
if (c) {
printf("FAIL - found invalid encoding 'moose'\n");
@@ -65,8 +50,6 @@ int main (int argc, char **argv)
return 1;
}
- parserutils_charset_aliases_destroy(myrealloc, NULL);
-
printf("PASS\n");
return 0;
diff --git a/test/charset.c b/test/charset.c
deleted file mode 100644
index a793e7e..0000000
--- a/test/charset.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "charset/charset.h"
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- if (argc != 2) {
- printf("Usage: %s <filename>\n", argv[0]);
- return 1;
- }
-
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
- assert (parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/cscodec-8859.c b/test/cscodec-8859.c
index 4d0c8b4..7fd5a2f 100644
--- a/test/cscodec-8859.c
+++ b/test/cscodec-8859.c
@@ -2,7 +2,6 @@
#include <stdio.h>
#include <string.h>
-#include "charset/charset.h"
#include <parserutils/charset/codec.h>
#include "utils/utils.h"
@@ -48,9 +47,6 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING);
@@ -92,9 +88,6 @@ int main(int argc, char **argv)
parserutils_charset_codec_destroy(ctx.codec);
- assert(parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/cscodec-ext8.c b/test/cscodec-ext8.c
index 4d0c8b4..7fd5a2f 100644
--- a/test/cscodec-ext8.c
+++ b/test/cscodec-ext8.c
@@ -2,7 +2,6 @@
#include <stdio.h>
#include <string.h>
-#include "charset/charset.h"
#include <parserutils/charset/codec.h>
#include "utils/utils.h"
@@ -48,9 +47,6 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING);
@@ -92,9 +88,6 @@ int main(int argc, char **argv)
parserutils_charset_codec_destroy(ctx.codec);
- assert(parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/cscodec-utf16.c b/test/cscodec-utf16.c
index e2f3c52..dd0ebb1 100644
--- a/test/cscodec-utf16.c
+++ b/test/cscodec-utf16.c
@@ -6,7 +6,6 @@
#include <arpa/inet.h>
#include <netinet/in.h>
-#include "charset/charset.h"
#include <parserutils/charset/codec.h>
#include "utils/utils.h"
@@ -51,9 +50,6 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING);
@@ -101,9 +97,6 @@ int main(int argc, char **argv)
parserutils_charset_codec_destroy(ctx.codec);
- assert(parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/cscodec-utf8.c b/test/cscodec-utf8.c
index 3e18e9e..b0dcb89 100644
--- a/test/cscodec-utf8.c
+++ b/test/cscodec-utf8.c
@@ -1,7 +1,6 @@
#include <stdio.h>
#include <string.h>
-#include "charset/charset.h"
#include <parserutils/charset/codec.h>
#include "utils/utils.h"
@@ -46,9 +45,6 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_charset_codec_create("NATS-SEFI-ADD",
myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING);
@@ -92,9 +88,6 @@ int main(int argc, char **argv)
parserutils_charset_codec_destroy(ctx.codec);
- assert(parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/filter.c b/test/filter.c
index 044a772..c66b684 100644
--- a/test/filter.c
+++ b/test/filter.c
@@ -32,10 +32,6 @@ int main(int argc, char **argv)
return 1;
}
- /* Initialise library */
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
/* Create input filter */
assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) ==
PARSERUTILS_OK);
@@ -349,8 +345,6 @@ int main(int argc, char **argv)
/* Clean up */
parserutils_filter_destroy(input);
- assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/inputstream.c b/test/inputstream.c
index ec72629..99d3a3d 100644
--- a/test/inputstream.c
+++ b/test/inputstream.c
@@ -36,10 +36,6 @@ int main(int argc, char **argv)
return 1;
}
- /* Initialise library */
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_inputstream_create("UTF-8", 1, NULL,
myrealloc, NULL, &stream) == PARSERUTILS_OK);
@@ -94,8 +90,6 @@ int main(int argc, char **argv)
parserutils_inputstream_destroy(stream);
- assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/parserutils.c b/test/parserutils.c
deleted file mode 100644
index c6d671a..0000000
--- a/test/parserutils.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <parserutils/parserutils.h>
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- if (argc != 2) {
- printf("Usage: %s <filename>\n", argv[0]);
- return 1;
- }
-
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
- assert (parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/regression/INDEX b/test/regression/INDEX
index 135e761..f6de6cf 100644
--- a/test/regression/INDEX
+++ b/test/regression/INDEX
@@ -2,7 +2,6 @@
#
# Test Description DataDir
-cscodec-segv Segfault in charset codecs
filter-segv Segfault in input filtering
stream-nomem Inputstream buffer expansion
filter-badenc-segv Segfault on resetting bad encoding in filter
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 0799ea4..2cdeaf7 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -1,5 +1,5 @@
# Tests
-DIR_TEST_ITEMS := cscodec-segv:cscodec-segv.c filter-segv:filter-segv.c \
+DIR_TEST_ITEMS := filter-segv:filter-segv.c \
stream-nomem:stream-nomem.c filter-badenc-segv:filter-badenc-segv.c
CFLAGS := $(CFLAGS) -I$(CURDIR)/test
diff --git a/test/regression/cscodec-segv.c b/test/regression/cscodec-segv.c
deleted file mode 100644
index 0cf9f69..0000000
--- a/test/regression/cscodec-segv.c
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <stdio.h>
-
-#include "charset/charset.h"
-#include <parserutils/charset/codec.h>
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- parserutils_charset_codec *codec;
-
- if (argc != 2) {
- printf("Usage: %s <aliases_file>\n", argv[0]);
- return 1;
- }
-
- assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
- assert(parserutils_charset_codec_create("UTF-8", myrealloc, NULL,
- &codec) == PARSERUTILS_OK);
-
- parserutils_charset_codec_destroy(codec);
-
- assert(parserutils_charset_finalise(myrealloc, NULL) ==
- PARSERUTILS_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/regression/filter-badenc-segv.c b/test/regression/filter-badenc-segv.c
index 09ae864..0c9877a 100644
--- a/test/regression/filter-badenc-segv.c
+++ b/test/regression/filter-badenc-segv.c
@@ -31,9 +31,6 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) ==
PARSERUTILS_OK);
@@ -49,8 +46,6 @@ int main(int argc, char **argv)
parserutils_filter_destroy(input);
- assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/regression/filter-segv.c b/test/regression/filter-segv.c
index a884b6d..58905f9 100644
--- a/test/regression/filter-segv.c
+++ b/test/regression/filter-segv.c
@@ -23,16 +23,11 @@ int main(int argc, char **argv)
return 1;
}
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) ==
PARSERUTILS_OK);
parserutils_filter_destroy(input);
- assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
printf("PASS\n");
return 0;
diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c
index fc8d514..2f7707e 100644
--- a/test/regression/stream-nomem.c
+++ b/test/regression/stream-nomem.c
@@ -49,9 +49,6 @@ int main(int argc, char **argv)
input_buffer[BUFFER_SIZE - 7] = '2';
input_buffer[BUFFER_SIZE - 8] = '1';
- assert(parserutils_initialise(argv[1], myrealloc, NULL) ==
- PARSERUTILS_OK);
-
assert(parserutils_inputstream_create("UTF-8", 0,
NULL, myrealloc, NULL, &stream) == PARSERUTILS_OK);
@@ -84,8 +81,6 @@ int main(int argc, char **argv)
parserutils_inputstream_destroy(stream);
- assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK);
-
printf("PASS\n");
return 0;