summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-03-20 18:09:59 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-03-20 18:09:59 +0000
commit2ec85e0009fc1808ed79b4697e8502795b46564b (patch)
tree8b7b8eb19fe4feecbd1f0fb9fed718d5c523259d /src
parent9273b7d54f872ede1a3c77d628495065a4bfa206 (diff)
downloadpcre2-2ec85e0009fc1808ed79b4697e8502795b46564b.tar.gz
Renamed dftables as pcre2_dftables and enable it to write the tables in binary.
Update documentation about character tables. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1237 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src')
-rw-r--r--src/pcre2.h.in1
-rw-r--r--src/pcre2_chartables.c.dist38
-rw-r--r--src/pcre2_compile.c10
-rw-r--r--src/pcre2_config.c10
-rw-r--r--src/pcre2_dftables.c (renamed from src/dftables.c)204
-rw-r--r--src/pcre2_internal.h4
-rw-r--r--src/pcre2_maketables.c64
-rw-r--r--src/pcre2_serialize.c16
-rw-r--r--src/pcre2test.c56
9 files changed, 271 insertions, 132 deletions
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 8ccaed1..4fd6a1e 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -448,6 +448,7 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_HEAPLIMIT 12
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
+#define PCRE2_CONFIG_TABLES_LENGTH 15
/* Types for code units in patterns and subject strings. */
diff --git a/src/pcre2_chartables.c.dist b/src/pcre2_chartables.c.dist
index 0e07edb..861914d 100644
--- a/src/pcre2_chartables.c.dist
+++ b/src/pcre2_chartables.c.dist
@@ -2,17 +2,21 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* This file was automatically written by the dftables auxiliary
+/* This file was automatically written by the pcre2_dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256. */
-/*The dftables program (which is distributed with PCRE2) can be used to
-build alternative versions of this file. This is necessary if you are
+/* This set of tables was written in the C locale. */
+
+/* The pcre2_ftables program (which is distributed with PCRE2) can be used
+to build alternative versions of this file. This is necessary if you are
running in an EBCDIC environment, or if you want to default to a different
-encoding, for example ISO-8859-1. When dftables is run, it creates these
-tables in the current locale. This happens automatically if PCRE2 is
-configured with --enable-rebuild-chartables. */
+encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
+these tables in the "C" locale by default. This happens automatically if
+PCRE2 is configured with --enable-rebuild-chartables. However, you can run
+pcre2_dftables manually with the -L option to build tables using the LC_ALL
+locale. */
/* The following #include is present because without it gcc 4.x may remove
the array definition from the final binary if PCRE2 is built into a static
@@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph print, punct, and cntrl. Other classes are built from combinations. */
+graph, print, punct, and cntrl. Other classes are built from combinations. */
- 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
+ 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
+ 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
+ 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
+ 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 9f05d19..62393be 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1202,7 +1202,7 @@ in the decoded tables. */
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
(*ref_count)++;
}
@@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
memcpy(newcode, code, code->blocksize);
newcode->executable_jit = NULL;
-newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
+newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
code->memctl.memory_data);
if (newtables == NULL)
{
code->memctl.free((void *)newcode, code->memctl.memory_data);
return NULL;
}
-memcpy(newtables, code->tables, tables_length);
-ref_count = (PCRE2_SIZE *)(newtables + tables_length);
+memcpy(newtables, code->tables, TABLES_LENGTH);
+ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
*ref_count = 1;
newcode->tables = newtables;
@@ -1270,7 +1270,7 @@ if (code != NULL)
be freed when there are no more references to them. The *ref_count should
always be > 0. */
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
if (*ref_count > 0)
{
(*ref_count)--;
diff --git a/src/pcre2_config.c b/src/pcre2_config.c
index e487b10..5ef103c 100644
--- a/src/pcre2_config.c
+++ b/src/pcre2_config.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
-its value gets changed by pcre2_internal.h to be in code units. */
+its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
+be in code units. */
static int configured_link_size = LINK_SIZE;
@@ -94,6 +95,7 @@ if (where == NULL) /* Requests a length */
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
+ case PCRE2_CONFIG_TABLES_LENGTH:
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
@@ -191,6 +193,10 @@ switch (what)
*((uint32_t *)where) = 0;
break;
+ case PCRE2_CONFIG_TABLES_LENGTH:
+ *((uint32_t *)where) = TABLES_LENGTH;
+ break;
+
case PCRE2_CONFIG_UNICODE_VERSION:
{
#if defined SUPPORT_UNICODE
diff --git a/src/dftables.c b/src/pcre2_dftables.c
index 02796cc..17ebd89 100644
--- a/src/dftables.c
+++ b/src/pcre2_dftables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -40,9 +40,12 @@ POSSIBILITY OF SUCH DAMAGE.
/* This is a freestanding support program to generate a file containing
-character tables for PCRE2. The tables are built according to the current
-locale using the pcre2_maketables() function, which is part of the PCRE2 API.
-*/
+character tables for PCRE2. The tables are built using the pcre2_maketables()
+function, which is part of the PCRE2 API. By default, the system's "C" locale
+is used rather than what the building user happens to have set, but the -L
+option can be used to select the current locale from the LC_ALL environment
+variable. By default, the tables are written in source form, but if -b is
+given, they are written in binary. */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -56,31 +59,88 @@ locale using the pcre2_maketables() function, which is part of the PCRE2 API.
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
#include "pcre2_internal.h"
-#define DFTABLES /* pcre2_maketables.c notices this */
+#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
#include "pcre2_maketables.c"
+
+static char *classlist[] =
+ {
+ "space", "xdigit", "digit", "upper", "lower",
+ "word", "graph", "print", "punct", "cntrl"
+ };
+
+
+
+/*************************************************
+* Usage *
+*************************************************/
+
+static void
+usage(void)
+{
+(void)fprintf(stderr,
+ "Usage: pcre2_dftables [options] <output file>\n"
+ " -b Write output in binary (default is source code)\n"
+ " -L Use locale from LC_ALL (default is \"C\" locale)\n"
+ );
+}
+
+
+
+/*************************************************
+* Entry point *
+*************************************************/
+
int main(int argc, char **argv)
{
FILE *f;
-int i = 1;
+int i;
+int nclass = 0;
+BOOL binary = FALSE;
+char *env = "C";
const unsigned char *tables;
const unsigned char *base_of_tables;
-/* By default, the default C locale is used rather than what the building user
-happens to have set. However, if the -L option is given, set the locale from
-the LC_xxx environment variables. */
+/* Process options */
-if (argc > 1 && strcmp(argv[1], "-L") == 0)
+for (i = 1; i < argc; i++)
{
- setlocale(LC_ALL, ""); /* Set from environment variables */
- i++;
- }
+ unsigned char *arg = (unsigned char *)argv[i];
+ if (*arg != '-') break;
+
+ if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
+ {
+ usage();
+ return 0;
+ }
+
+ else if (strcmp(arg, "-L") == 0)
+ {
+ if (setlocale(LC_ALL, "") == NULL)
+ {
+ (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
+ return 1;
+ }
+ env = getenv("LC_ALL");
+ }
+
+ else if (strcmp(arg, "-b") == 0)
+ binary = TRUE;
+
+ else
+ {
+ (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
+ return 1;
+ }
+ }
-if (argc < i + 1)
+if (i != argc - 1)
{
- fprintf(stderr, "dftables: one filename argument is required\n");
+ (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
return 1;
}
+
+/* Make the tables */
tables = maketables();
base_of_tables = tables;
@@ -88,41 +148,64 @@ base_of_tables = tables;
f = fopen(argv[i], "wb");
if (f == NULL)
{
- fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
+ fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
return 1;
}
+
+/* If -b was specified, we write the tables in binary. */
-/* There are several fprintf() calls here, because gcc in pedantic mode
-complains about the very long string otherwise. */
+if (binary)
+ {
+ int yield = 0;
+ size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
+ if (len != TABLES_LENGTH)
+ {
+ (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
+ "instead of %d\n", (int)len, TABLES_LENGTH);
+ yield = 1;
+ }
+ fclose(f);
+ free((void *)base_of_tables);
+ return yield;
+ }
-fprintf(f,
+/* Write the tables as source code for inclusion in the PCRE2 library. There
+are several fprintf() calls here, because gcc in pedantic mode complains about
+the very long string otherwise. */
+
+(void)fprintf(f,
"/*************************************************\n"
"* Perl-Compatible Regular Expressions *\n"
"*************************************************/\n\n"
- "/* This file was automatically written by the dftables auxiliary\n"
+ "/* This file was automatically written by the pcre2_dftables auxiliary\n"
"program. It contains character tables that are used when no external\n"
"tables are passed to PCRE2 by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256. */\n\n");
+
+(void)fprintf(f,
+ "/* This set of tables was written in the %s locale. */\n\n", env);
-fprintf(f,
- "/*The dftables program (which is distributed with PCRE2) can be used to\n"
- "build alternative versions of this file. This is necessary if you are\n"
+(void)fprintf(f,
+ "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
+ "to build alternative versions of this file. This is necessary if you are\n"
"running in an EBCDIC environment, or if you want to default to a different\n"
- "encoding, for example ISO-8859-1. When dftables is run, it creates these\n"
- "tables in the current locale. This happens automatically if PCRE2 is\n"
- "configured with --enable-rebuild-chartables. */\n\n");
+ "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
+ "these tables in the \"C\" locale by default. This happens automatically if\n"
+ "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
+ "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
+ "locale. */\n\n");
/* Force config.h in z/OS */
#if defined NATIVE_ZOS
-fprintf(f,
+(void)fprintf(f,
"/* For z/OS, config.h is forced */\n"
"#ifndef HAVE_CONFIG_H\n"
"#define HAVE_CONFIG_H 1\n"
"#endif\n\n");
#endif
-fprintf(f,
+(void)fprintf(f,
"/* The following #include is present because without it gcc 4.x may remove\n"
"the array definition from the final binary if PCRE2 is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
@@ -130,56 +213,57 @@ fprintf(f,
"outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n");
-fprintf(f,
+(void)fprintf(f,
"#ifdef HAVE_CONFIG_H\n"
"#include \"config.h\"\n"
"#endif\n\n"
"#include \"pcre2_internal.h\"\n\n");
-fprintf(f,
+(void)fprintf(f,
"const uint8_t PRIV(default_tables)[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f, "/* This table is a case flipping table. */\n\n");
+(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f,
+(void)fprintf(f,
"/* This table contains bit maps for various character classes. Each map is 32\n"
"bytes long and the bits run from the least significant end of each byte. The\n"
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
- "graph print, punct, and cntrl. Other classes are built from combinations. */\n\n");
+ "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < cbit_length; i++)
{
if ((i & 7) == 0 && i != 0)
{
- if ((i & 31) == 0) fprintf(f, "\n");
- fprintf(f, "\n ");
+ if ((i & 31) == 0) (void)fprintf(f, "\n");
+ if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
+ (void)fprintf(f, "\n ");
}
- fprintf(f, "0x%02x", *tables++);
- if (i != cbit_length - 1) fprintf(f, ",");
+ (void)fprintf(f, "0x%02x", *tables++);
+ if (i != cbit_length - 1) (void)fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f,
+(void)fprintf(f,
"/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n"
" 0x%02x letter\n"
@@ -188,32 +272,32 @@ fprintf(f,
" 0x%02x alphanumeric or '_'\n*/\n\n",
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0)
{
- fprintf(f, " /* ");
- if (isprint(i-8)) fprintf(f, " %c -", i-8);
- else fprintf(f, "%3d-", i-8);
- if (isprint(i-1)) fprintf(f, " %c ", i-1);
- else fprintf(f, "%3d", i-1);
- fprintf(f, " */\n ");
+ (void)fprintf(f, " /* ");
+ if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
+ else (void)fprintf(f, "%3d-", i-8);
+ if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
+ else (void)fprintf(f, "%3d", i-1);
+ (void)fprintf(f, " */\n ");
}
- fprintf(f, "0x%02x", *tables++);
- if (i != 255) fprintf(f, ",");
+ (void)fprintf(f, "0x%02x", *tables++);
+ if (i != 255) (void)fprintf(f, ",");
}
-fprintf(f, "};/* ");
-if (isprint(i-8)) fprintf(f, " %c -", i-8);
- else fprintf(f, "%3d-", i-8);
-if (isprint(i-1)) fprintf(f, " %c ", i-1);
- else fprintf(f, "%3d", i-1);
-fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
+(void)fprintf(f, "};/* ");
+if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
+ else (void)fprintf(f, "%3d-", i-8);
+if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
+ else (void)fprintf(f, "%3d", i-1);
+(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
fclose(f);
free((void *)base_of_tables);
return 0;
}
-/* End of dftables.c */
+/* End of pcre2_dftables.c */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index 9963d6f..cb81199 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -579,7 +579,7 @@ total length of the tables. */
#define fcc_offset 256 /* Flip case */
#define cbits_offset 512 /* Character classes */
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
-#define tables_length (ctypes_offset + 256)
+#define TABLES_LENGTH (ctypes_offset + 256)
/* -------------------- Character and string names ------------------------ */
diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c
index 8c93b4b..56d2494 100644
--- a/src/pcre2_maketables.c
+++ b/src/pcre2_maketables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains the external function pcre2_maketables(), which builds
character tables for PCRE2 in the current locale. The file is compiled on its
-own as part of the PCRE2 library. However, it is also included in the
-compilation of dftables.c, in which case the macro DFTABLES is defined. */
+own as part of the PCRE2 library. It is also included in the compilation of
+pcre2_dftables.c as a freestanding program, in which case the macro
+PCRE2_DFTABLES is defined. */
-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES /* Compiling the library */
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
@@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if
-supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
-program) malloc() is used, and the function has a different name so as not to
-clash with the prototype in pcre2.h.
+supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
+freestanding auxiliary program) malloc() is used, and the function has a
+different name so as not to clash with the prototype in pcre2.h.
-Arguments: none when DFTABLES is defined
- else a PCRE2 general context or NULL
+Arguments: none when PCRE2_DFTABLES is defined
+ else a PCRE2 general context or NULL
Returns: pointer to the contiguous block of data
+ else NULL if memory allocation failed
*/
-#ifdef DFTABLES /* Included in freestanding dftables.c program */
+#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
static const uint8_t *maketables(void)
{
-uint8_t *yield = (uint8_t *)malloc(tables_length);
+uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
-#else /* Not DFTABLES, compiling the library */
+#else /* Not PCRE2_DFTABLES, that is, compiling the library */
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
pcre2_maketables(pcre2_general_context *gcontext)
{
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
- gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
- malloc(tables_length));
-#endif /* DFTABLES */
+ gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
+ malloc(TABLES_LENGTH));
+#endif /* PCRE2_DFTABLES */
int i;
uint8_t *p;
@@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
-From release 8.34 is is also correct for Perl space, because Perl added VT at
-release 5.18.
+From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
+space, because Perl added VT at release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
@@ -114,24 +116,24 @@ test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
- if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
- if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
- if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
- if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
- if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
- if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
- if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
- if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
- if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
- if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
- if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
+ if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
+ if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
+ if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
+ if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
+ if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
+ if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
+ if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
+ if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
+ if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
+ if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
+ if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
-comments within regexes. However, Perl changed at release 5.18, so PCRE changed
-at release 8.34. */
+comments within regexes. However, Perl changed at release 5.18, so PCRE1
+changed at release 8.34 and it's always been this way for PCRE2. */
for (i = 0; i < 256; i++)
{
@@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
return yield;
}
-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES /* Compiling the library */
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
{
diff --git a/src/pcre2_serialize.c b/src/pcre2_serialize.c
index cec1a03..ba17a26 100644
--- a/src/pcre2_serialize.c
+++ b/src/pcre2_serialize.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
/* Compute total size. */
-total_size = sizeof(pcre2_serialized_data) + tables_length;
+total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
tables = NULL;
for (i = 0; i < number_of_codes; i++)
@@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;
/* Copy all compiled code data. */
dst_bytes = bytes + sizeof(pcre2_serialized_data);
-memcpy(dst_bytes, tables, tables_length);
-dst_bytes += tables_length;
+memcpy(dst_bytes, tables, TABLES_LENGTH);
+dst_bytes += TABLES_LENGTH;
for (i = 0; i < number_of_codes; i++)
{
@@ -189,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
/* Decode tables. The reference count for the tables is stored immediately
following them. */
-tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
+tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
-memcpy(tables, src_bytes, tables_length);
-*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
-src_bytes += tables_length;
+memcpy(tables, src_bytes, TABLES_LENGTH);
+*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
+src_bytes += TABLES_LENGTH;
/* Decode the byte stream. We must not try to read the size from the compiled
code block in the stream, because it might be unaligned, which causes errors on
diff --git a/src/pcre2test.c b/src/pcre2test.c
index fdd0724..4657449 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -389,12 +389,14 @@ typedef struct cmdstruct {
int value;
} cmdstruct;
-enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
- CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
+enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
+ CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
+ CMD_UNKNOWN };
static cmdstruct cmdlist[] = {
{ "forbid_utf", CMD_FORBID_UTF },
{ "load", CMD_LOAD },
+ { "loadtables", CMD_LOADTABLES },
{ "newline_default", CMD_NEWLINE_DEFAULT },
{ "pattern", CMD_PATTERN },
{ "perltest", CMD_PERLTEST },
@@ -957,6 +959,8 @@ static int *dfa_workspace = NULL;
static const uint8_t *locale_tables = NULL;
static const uint8_t *use_tables = NULL;
static uint8_t locale_name[32];
+static uint8_t *tables3 = NULL; /* For binary-loaded tables */
+static uint32_t loadtables_length = 0;
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
rebuilding, but set up the same naming scheme for use in macros. The "buffer"
@@ -4795,12 +4799,13 @@ Arguments:
buffptr point after the #command
mode open mode
fptr points to the FILE variable
+ name name of # command
Returns: PR_OK or PR_ABEND
*/
static int
-open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
+open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
{
char *endf;
char *filename = (char *)buffptr;
@@ -4810,7 +4815,7 @@ while (endf > filename && isspace(endf[-1])) endf--;
if (endf == filename)
{
- fprintf(outfile, "** File name expected after #save\n");
+ fprintf(outfile, "** File name expected after %s\n", name);
return PR_ABEND;
}
@@ -4976,7 +4981,7 @@ switch(cmd)
return PR_OK;
}
- rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
+ rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
if (rc != PR_OK) return rc;
PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
@@ -5015,7 +5020,7 @@ switch(cmd)
/* Load a set of compiled patterns from a file onto the stack */
case CMD_LOAD:
- rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
+ rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
if (rc != PR_OK) return rc;
serial_size = 0;
@@ -5067,6 +5072,31 @@ switch(cmd)
free(serial);
break;
+
+ /* Load a set of binary tables into tables3. */
+
+ case CMD_LOADTABLES:
+ rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
+ if (rc != PR_OK) return rc;
+
+ if (tables3 == NULL)
+ {
+ (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
+ tables3 = malloc(loadtables_length);
+ if (tables3 == NULL)
+ {
+ fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
+ return PR_ABEND;
+ }
+ }
+
+ if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
+ {
+ fprintf(outfile, "** Wrong return from fread()\n");
+ yield = PR_ABEND;
+ }
+ fclose(f);
+ break;
}
return yield;
@@ -5382,8 +5412,19 @@ else switch (pat_patctl.tables_id)
case 0: use_tables = NULL; break;
case 1: use_tables = tables1; break;
case 2: use_tables = tables2; break;
+
+ case 3:
+ if (tables3 == NULL)
+ {
+ fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
+ "been loaded\n");
+ return PR_SKIP;
+ }
+ use_tables = tables3;
+ break;
+
default:
- fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
+ fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
return PR_SKIP;
}
@@ -9112,6 +9153,7 @@ free(dbuffer);
free(pbuffer8);
free(dfa_workspace);
free((void *)locale_tables);
+free(tables3);
PCRE2_MATCH_DATA_FREE(match_data);
SUB1(pcre2_code_free, compiled_code);