diff options
Diffstat (limited to 'doc/pcre2demo.3')
-rw-r--r-- | doc/pcre2demo.3 | 56 |
1 files changed, 34 insertions, 22 deletions
diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3 index 5deed0a..c02dcd9 100644 --- a/doc/pcre2demo.3 +++ b/doc/pcre2demo.3 @@ -20,28 +20,31 @@ *************************************************/ /* This is a demonstration program to illustrate a straightforward way of -calling the PCRE2 regular expression library from a C program. See the +using the PCRE2 regular expression library from a C program. See the pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. There are actually three libraries, each supporting a different code unit -width. This demonstration program uses the 8-bit library. +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. In Unix-like environments, if PCRE2 is installed in your standard system libraries, you should be able to compile this program using this command: -gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo If PCRE2 is not installed in a standard place, it is likely to be installed with support for the pkg-config mechanism. If you have pkg-config, you can compile this program using this command: -gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo -If you do not have pkg-config, you may have to use this: +If you do not have pkg-config, you may have to use something like this: -gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e -R/usr/local/lib -lpcre2-8 -o pcre2demo Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and @@ -56,9 +59,14 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names -such as pcre2_compile(). */ +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -79,19 +87,19 @@ int main(int argc, char **argv) { pcre2_code *re; PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ -PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ PCRE2_SPTR name_table; int crlf_is_newline; int errornumber; int find_all; int i; -int namecount; -int name_entry_size; int rc; int utf8; uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; uint32_t newline; PCRE2_SIZE erroroffset; @@ -106,15 +114,19 @@ pcre2_match_data *match_data; * First, sort out the command line. There is only one possible option at * * the moment, "-g" to request repeated matching to find all occurrences, * * like Perl's /g option. We set the variable find_all to a non-zero value * -* if the -g option is present. Apart from that, there must be exactly two * -* arguments. * +* if the -g option is present. * **************************************************************************/ find_all = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-g") == 0) find_all = 1; - else break; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\en", argv[i]); + return 1; + } + else break; } /* After the options, we require exactly two arguments, which are the pattern, @@ -122,7 +134,7 @@ and the subject string. */ if (argc - i != 2) { - printf("Two arguments required: a regex and a subject string\en"); + printf("Exactly two arguments required: a regex and a subject string\en"); return 1; } @@ -201,7 +213,7 @@ if (rc < 0) stored. */ ovector = pcre2_get_ovector_pointer(match_data); -printf("\enMatch succeeded at offset %d\en", (int)ovector[0]); +printf("Match succeeded at offset %d\en", (int)ovector[0]); /************************************************************************* @@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */ PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ &namecount); /* where to put the answer */ -if (namecount <= 0) printf("No named substrings\en"); else +if (namecount == 0) printf("No named substrings\en"); else { PCRE2_SPTR tabptr; printf("Named substrings\en"); @@ -330,8 +342,8 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY || for (;;) { - uint32_t options = 0; /* Normally no options */ - PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ /* If the previous match was for an empty string, we are finished if we are at the end of the subject. Otherwise, arrange to run another match at the @@ -371,7 +383,7 @@ for (;;) { if (options == 0) break; /* All matches found */ ovector[1] = start_offset + 1; /* Advance one code unit */ - if (crlf_is_newline && /* If CRLF is newline & */ + if (crlf_is_newline && /* If CRLF is a newline & */ start_offset < subject_length - 1 && /* we are at CRLF, */ subject[start_offset] == '\er' && subject[start_offset + 1] == '\en') @@ -417,7 +429,7 @@ for (;;) printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); } - if (namecount <= 0) printf("No named substrings\en"); else + if (namecount == 0) printf("No named substrings\en"); else { PCRE2_SPTR tabptr = name_table; printf("Named substrings\en"); |