summaryrefslogtreecommitdiff
path: root/pcregrep.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-08-15 16:45:36 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-08-15 16:45:36 +0000
commite5aae0d579e12d0eb2e22a6e2fcb0042ba2be0fd (patch)
tree50e19e340c02d2ac5e02337445ac14b71b6f8d87 /pcregrep.c
parentdb525ce55e5979dfb31209e0019d171216b19545 (diff)
downloadpcre-e5aae0d579e12d0eb2e22a6e2fcb0042ba2be0fd.tar.gz
Fix a number of issues in pcregrep.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1003 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcregrep.c')
-rw-r--r--pcregrep.c747
1 files changed, 474 insertions, 273 deletions
diff --git a/pcregrep.c b/pcregrep.c
index ee4d6b8..6ac870e 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -70,15 +70,16 @@ POSSIBILITY OF SUCH DAMAGE.
typedef int BOOL;
-#define MAX_PATTERN_COUNT 100
#define OFFSET_SIZE 99
#if BUFSIZ > 8192
-#define PATBUFSIZE BUFSIZ
+#define MAXPATLEN BUFSIZ
#else
-#define PATBUFSIZE 8192
+#define MAXPATLEN 8192
#endif
+#define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
+
/* Values for the "filenames" variable, which specifies options for file name
output. The order is important; it is assumed that a file name is wanted for
all values greater than FN_DEFAULT. */
@@ -139,40 +140,31 @@ static char *colour_string = (char *)"1;31";
static char *colour_option = NULL;
static char *dee_option = NULL;
static char *DEE_option = NULL;
+static char *locale = NULL;
static char *main_buffer = NULL;
static char *newline = NULL;
-static char *pattern_filename = NULL;
static char *stdin_name = (char *)"(standard input)";
-static char *locale = NULL;
static const unsigned char *pcretables = NULL;
-static int pattern_count = 0;
-static pcre **pattern_list = NULL;
-static pcre_extra **hints_list = NULL;
-
-static char *file_list = NULL;
-static char *include_pattern = NULL;
-static char *exclude_pattern = NULL;
-static char *include_dir_pattern = NULL;
-static char *exclude_dir_pattern = NULL;
-
-static pcre *include_compiled = NULL;
-static pcre *exclude_compiled = NULL;
-static pcre *include_dir_compiled = NULL;
-static pcre *exclude_dir_compiled = NULL;
-
static int after_context = 0;
static int before_context = 0;
static int binary_files = BIN_BINARY;
static int both_context = 0;
static int bufthird = PCREGREP_BUFSIZE;
static int bufsize = 3*PCREGREP_BUFSIZE;
+
+#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
+static int dee_action = dee_SKIP;
+#else
static int dee_action = dee_READ;
+#endif
+
static int DEE_action = DEE_READ;
static int error_count = 0;
static int filenames = FN_DEFAULT;
static int only_matching = -1;
+static int pcre_options = 0;
static int process_options = 0;
#ifdef SUPPORT_PCREGREP_JIT
@@ -199,10 +191,80 @@ static BOOL quiet = FALSE;
static BOOL silent = FALSE;
static BOOL utf8 = FALSE;
+/* Structure for list of file names (for -f and --{in,ex}clude-from) */
+
+typedef struct fnstr {
+ struct fnstr *next;
+ char *name;
+} fnstr;
+
+static fnstr *exclude_from = NULL;
+static fnstr *exclude_from_last = NULL;
+static fnstr *include_from = NULL;
+static fnstr *include_from_last = NULL;
+
+static fnstr *file_lists = NULL;
+static fnstr *file_lists_last = NULL;
+static fnstr *pattern_files = NULL;
+static fnstr *pattern_files_last = NULL;
+
+/* Structure for holding the two variables that describe a file name chain. */
+
+typedef struct fndatastr {
+ fnstr **anchor;
+ fnstr **lastptr;
+} fndatastr;
+
+static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
+static fndatastr include_from_data = { &include_from, &include_from_last };
+static fndatastr file_lists_data = { &file_lists, &file_lists_last };
+static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
+
+/* Structure for pattern and its compiled form; used for matching patterns and
+also for include/exclude patterns. */
+
+typedef struct patstr {
+ struct patstr *next;
+ char *string;
+ pcre *compiled;
+ pcre_extra *hint;
+} patstr;
+
+static patstr *patterns = NULL;
+static patstr *patterns_last = NULL;
+static patstr *include_patterns = NULL;
+static patstr *include_patterns_last = NULL;
+static patstr *exclude_patterns = NULL;
+static patstr *exclude_patterns_last = NULL;
+static patstr *include_dir_patterns = NULL;
+static patstr *include_dir_patterns_last = NULL;
+static patstr *exclude_dir_patterns = NULL;
+static patstr *exclude_dir_patterns_last = NULL;
+
+/* Structure holding the two variables that describe a pattern chain. A pointer
+to such structures is used for each appropriate option. */
+
+typedef struct patdatastr {
+ patstr **anchor;
+ patstr **lastptr;
+} patdatastr;
+
+static patdatastr match_patdata = { &patterns, &patterns_last };
+static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
+static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
+static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
+static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
+
+static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
+ &include_dir_patterns, &exclude_dir_patterns };
+
+static const char *incexname[4] = { "--include", "--exclude",
+ "--include-dir", "--exclude-dir" };
+
/* Structure for options and list of them */
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
- OP_OP_NUMBER, OP_PATLIST, OP_BINFILES };
+ OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
typedef struct option_item {
int type;
@@ -233,6 +295,8 @@ used to identify them. */
#define N_NOJIT (-16)
#define N_FILE_LIST (-17)
#define N_BINARY_FILES (-18)
+#define N_EXCLUDE_FROM (-19)
+#define N_INCLUDE_FROM (-20)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
@@ -248,10 +312,10 @@ static option_item optionlist[] = {
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
{ OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
{ OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
- { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
+ { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
{ OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
- { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
- { OP_STRING, N_FILE_LIST, &file_list, "file-list=path","read files to search from file" },
+ { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
+ { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
@@ -276,18 +340,20 @@ static option_item optionlist[] = {
{ OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
- { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
- { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
- { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
- { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
+ { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
+ { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
+ { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
+ { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
+ { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
+ { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
/* These two were accidentally implemented with underscores instead of
hyphens in the option names. As this was not discovered for several releases,
the incorrect versions are left in the table for compatibility. However, the
--help function misses out any option that has an underscore in its name. */
- { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
- { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
+ { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
+ { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
#ifdef JFRIEDL_DEBUG
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
@@ -304,7 +370,9 @@ static option_item optionlist[] = {
/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
options. These set the 1, 2, and 4 bits in process_options, respectively. Note
that the combination of -w and -x has the same effect as -x on its own, so we
-can treat them as the same. */
+can treat them as the same. Note that the MAXPATLEN macro assumes the longest
+prefix+suffix is 10 characters; if anything longer is added, it must be
+adjusted. */
static const char *prefix[] = {
"", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
@@ -325,6 +393,94 @@ const char utf8_table4[] = {
/*************************************************
+* Add item to chain of patterns *
+*************************************************/
+
+/* Used to add an item onto a chain, or just return an unconnected item if the
+"after" argument is NULL.
+
+Arguments:
+ s pattern string to add
+ after if not NULL points to item to insert after
+
+Returns: new pattern block, or NULL after malloc failure
+*/
+
+static patstr *
+add_pattern(char *s, patstr *after)
+{
+patstr *p = (patstr *)malloc(sizeof(patstr));
+if (p == NULL)
+ {
+ fprintf(stderr, "pcregrep: malloc failed\n");
+ return NULL;
+ }
+if (strlen(s) > MAXPATLEN)
+ {
+ fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
+ MAXPATLEN);
+ return NULL;
+ }
+p->next = NULL;
+p->string = s;
+p->compiled = NULL;
+p->hint = NULL;
+
+if (after != NULL)
+ {
+ p->next = after->next;
+ after->next = p;
+ }
+return p;
+}
+
+
+/*************************************************
+* Free chain of patterns *
+*************************************************/
+
+/* Used for several chains of patterns.
+
+Argument: pointer to start of chain
+Returns: nothing
+*/
+
+static void
+free_pattern_chain(patstr *pc)
+{
+while (pc != NULL)
+ {
+ patstr *p = pc;
+ pc = p->next;
+ if (p->hint != NULL) pcre_free_study(p->hint);
+ if (p->compiled != NULL) pcre_free(p->compiled);
+ free(p);
+ }
+}
+
+
+/*************************************************
+* Free chain of file names *
+*************************************************/
+
+/*
+Argument: pointer to start of chain
+Returns: nothing
+*/
+
+static void
+free_file_chain(fnstr *fn)
+{
+while (fn != NULL)
+ {
+ fnstr *f = fn;
+ fn = f->next;
+ free(f);
+ }
+}
+
+
+/*************************************************
* Exit from the program *
*************************************************/
@@ -365,6 +521,7 @@ although at present the only ones are for Unix, Win32, and for "no support". */
#include <dirent.h>
typedef DIR directory_type;
+#define FILESEP '/'
static int
isdirectory(char *filename)
@@ -372,7 +529,7 @@ isdirectory(char *filename)
struct stat statbuf;
if (stat(filename, &statbuf) < 0)
return 0; /* In the expectation that opening as a file will fail */
-return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
+return (statbuf.st_mode & S_IFMT) == S_IFDIR;
}
static directory_type *
@@ -459,13 +616,15 @@ BOOL first;
WIN32_FIND_DATA data;
} directory_type;
+#DEFINE FILESEP '/'
+
int
isdirectory(char *filename)
{
DWORD attr = GetFileAttributes(filename);
if (attr == INVALID_FILE_ATTRIBUTES)
return 0;
-return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
+return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
}
directory_type *
@@ -476,8 +635,8 @@ char *pattern;
directory_type *dir;
DWORD err;
len = strlen(filename);
-pattern = (char *) malloc(len + 3);
-dir = (directory_type *) malloc(sizeof(*dir));
+pattern = (char *)malloc(len + 3);
+dir = (directory_type *)malloc(sizeof(*dir));
if ((pattern == NULL) || (dir == NULL))
{
fprintf(stderr, "pcregrep: malloc failed\n");
@@ -563,6 +722,7 @@ return FALSE;
#else
+#DEFINE FILESEP 0;
typedef void directory_type;
int isdirectory(char *filename) { return 0; }
@@ -619,6 +779,45 @@ return sys_errlist[n];
/*************************************************
+* Test exclude/includes *
+*************************************************/
+
+/* If any exclude pattern matches, the path is excluded. Otherwise, unless
+there are no includes, the path must match an include pattern.
+
+Arguments:
+ path the path to be matched
+ ip the chain of include patterns
+ ep the chain of exclude patterns
+
+Returns: TRUE if the path is not excluded
+*/
+
+static BOOL
+test_incexc(char *path, patstr *ip, patstr *ep)
+{
+int plen = strlen(path);
+
+for (; ep != NULL; ep = ep->next)
+ {
+ if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
+ return FALSE;
+ }
+
+if (ip == NULL) return TRUE;
+
+for (; ip != NULL; ip = ip->next)
+ {
+ if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
+ return TRUE;
+ }
+
+return FALSE;
+}
+
+
+
+/*************************************************
* Read one line of input *
*************************************************/
@@ -935,8 +1134,9 @@ Arguments:
Returns: nothing
*/
-static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
- char *endptr, char *printname)
+static void
+do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
+ char *printname)
{
if (after_context > 0 && lastmatchnumber > 0)
{
@@ -983,20 +1183,22 @@ match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
{
int i;
size_t slen = length;
+patstr *p = patterns;
const char *msg = "this text:\n\n";
+
if (slen > 200)
{
slen = 200;
msg = "text that starts:\n\n";
}
-for (i = 0; i < pattern_count; i++)
+for (i = 1; p != NULL; p = p->next, i++)
{
- *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
+ *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
if (*mrc >= 0) return TRUE;
if (*mrc == PCRE_ERROR_NOMATCH) continue;
fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
- if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
+ if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
fprintf(stderr, "%s", msg);
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
fprintf(stderr, "\n\n");
@@ -1191,7 +1393,7 @@ while (ptr < endptr)
for (i = 0; i < jfriedl_XR; i++)
- match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
+ match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
if (gettimeofday(&end_time, &dummy) != 0)
@@ -1594,7 +1796,8 @@ Arguments:
dir_recurse TRUE if recursing is wanted (-r or -drecurse)
only_one_at_top TRUE if the path is the only one at toplevel
-Returns: 0 if there was at least one match
+Returns: -1 the file/directory was skipped
+ 0 if there was at least one match
1 if there were no matches
2 there was some kind of error
@@ -1605,9 +1808,9 @@ static int
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
{
int rc = 1;
-int sep;
int frtype;
void *handle;
+char *lastcomp;
FILE *in = NULL; /* Ensure initialized */
#ifdef SUPPORT_LIBZ
@@ -1631,14 +1834,23 @@ if (strcmp(pathname, "-") == 0)
stdin_name : NULL);
}
-/* If the file is a directory, skip if skipping or if we are recursing, scan
-each file and directory within it, subject to any include or exclude patterns
-that were set. The scanning code is localized so it can be made
-system-specific. */
+/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
+directories, whereas --include and --exclude apply to everything else. The test
+is against the final component of the path. */
+
+lastcomp = strrchr(pathname, FILESEP);
+lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
+
+/* If the file is a directory, skip if not recursing or if explicitly excluded.
+Otherwise, scan the directory and recurse for each path within it. The scanning
+code is localized so it can be made system-specific. */
-if ((sep = isdirectory(pathname)) != 0)
+if (isdirectory(pathname))
{
- if (dee_action == dee_SKIP) return 1;
+ if (dee_action == dee_SKIP ||
+ !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
+ return -1;
+
if (dee_action == dee_RECURSE)
{
char buffer[1024];
@@ -1655,31 +1867,8 @@ if ((sep = isdirectory(pathname)) != 0)
while ((nextfile = readdirectory(dir)) != NULL)
{
- int frc, nflen;
- sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
- nflen = (int)(strlen(nextfile));
-
- if (isdirectory(buffer))
- {
- if (exclude_dir_compiled != NULL &&
- pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
- continue;
-
- if (include_dir_compiled != NULL &&
- pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
- continue;
- }
- else
- {
- if (exclude_compiled != NULL &&
- pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
- continue;
-
- if (include_compiled != NULL &&
- pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
- continue;
- }
-
+ int frc;
+ sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
frc = grep_or_recurse(buffer, dir_recurse, FALSE);
if (frc > 1) rc = frc;
else if (frc == 0 && rc == 1) rc = 0;
@@ -1691,9 +1880,11 @@ if ((sep = isdirectory(pathname)) != 0)
}
/* If the file is not a directory and not a regular file, skip it if that's
-been requested. */
+been requested. Otherwise, check for explicit include/exclude. */
-else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
+else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
+ !test_incexc(lastcomp, include_patterns, exclude_patterns))
+ return -1;
/* Control reaches here if we have a regular file, or if we have a directory
and recursion or skipping was not requested, or if we have anything else and
@@ -1894,8 +2085,7 @@ printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
printf("When reading patterns or file names from a file, trailing white\n");
printf("space is removed and blank lines are ignored.\n");
-printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
- MAX_PATTERN_COUNT, PATBUFSIZE);
+printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
@@ -1939,7 +2129,7 @@ switch(letter)
case 'x': process_options |= PO_LINE_MATCH; break;
case 'V':
- fprintf(stderr, "pcregrep version %s\n", pcre_version());
+ fprintf(stdout, "pcregrep version %s\n", pcre_version());
pcregrep_exit(0);
break;
@@ -1983,13 +2173,20 @@ return buffer;
* Compile a single pattern *
*************************************************/
-/* When the -F option has been used, this is called for each substring.
-Otherwise it's called for each supplied pattern.
+/* Do nothing if the pattern has already been compiled. This is the case for
+include/exclude patterns read from a file.
+
+When the -F option has been used, each "pattern" may be a list of strings,
+separated by line breaks. They will be matched literally. We split such a
+string and compile the first substring, inserting an additional block into the
+pattern chain.
Arguments:
- pattern the pattern string
+ p points to the pattern block
options the PCRE options
- filename the file name, or NULL for a command-line pattern
+ popts the processing options
+ fromfile TRUE if the pattern was read from a file
+ fromtext file name or identifying text (e.g. "include")
count 0 if this is the only command line pattern, or
number of the command line pattern, or
linenumber for a pattern from a file
@@ -1998,47 +2195,52 @@ Returns: TRUE on success, FALSE after an error
*/
static BOOL
-compile_single_pattern(char *pattern, int options, char *filename, int count)
+compile_pattern(patstr *p, int options, int popts, int fromfile,
+ const char *fromtext, int count)
{
char buffer[PATBUFSIZE];
const char *error;
+char *ps = p->string;
+int patlen = strlen(ps);
int errptr;
-if (pattern_count >= MAX_PATTERN_COUNT)
- {
- fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
- (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
- return FALSE;
- }
+if (p->compiled != NULL) return TRUE;
-sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
- suffix[process_options]);
-pattern_list[pattern_count] =
- pcre_compile(buffer, options, &error, &errptr, pcretables);
-if (pattern_list[pattern_count] != NULL)
+if ((popts & PO_FIXED_STRINGS) != 0)
{
- pattern_count++;
- return TRUE;
+ int ellength;
+ char *eop = ps + patlen;
+ char *pe = end_of_line(ps, eop, &ellength);
+
+ if (ellength != 0)
+ {
+ if (add_pattern(pe, p) == NULL) return FALSE;
+ patlen = (int)(pe - ps - ellength);
+ }
}
+sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
+p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
+if (p->compiled != NULL) return TRUE;
+
/* Handle compile errors */
-errptr -= (int)strlen(prefix[process_options]);
-if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
+errptr -= (int)strlen(prefix[popts]);
+if (errptr > patlen) errptr = patlen;
-if (filename == NULL)
+if (fromfile)
{
- if (count == 0)
- fprintf(stderr, "pcregrep: Error in command-line regex "
- "at offset %d: %s\n", errptr, error);
- else
- fprintf(stderr, "pcregrep: Error in %s command-line regex "
- "at offset %d: %s\n", ordin(count), errptr, error);
+ fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
+ "at offset %d: %s\n", count, fromtext, errptr, error);
}
else
{
- fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
- "at offset %d: %s\n", count, filename, errptr, error);
+ if (count == 0)
+ fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
+ fromtext, errptr, error);
+ else
+ fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
+ ordin(count), fromtext, errptr, error);
}
return FALSE;
@@ -2047,43 +2249,79 @@ return FALSE;
/*************************************************
-* Compile one supplied pattern *
+* Read and compile a file of patterns *
*************************************************/
-/* When the -F option has been used, each string may be a list of strings,
-separated by line breaks. They will be matched literally.
+/* This is used for --filelist, --include-from, and --exclude-from.
Arguments:
- pattern the pattern string
- options the PCRE options
- filename the file name, or NULL for a command-line pattern
- count 0 if this is the only command line pattern, or
- number of the command line pattern, or
- linenumber for a pattern from a file
+ name the name of the file; "-" is stdin
+ patptr pointer to the pattern chain anchor
+ patlastptr pointer to the last pattern pointer
+ popts the process options to pass to pattern_compile()
-Returns: TRUE on success, FALSE after an error
+Returns: TRUE if all went well
*/
static BOOL
-compile_pattern(char *pattern, int options, char *filename, int count)
+read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
{
-if ((process_options & PO_FIXED_STRINGS) != 0)
+int linenumber = 0;
+FILE *f;
+char *filename;
+char buffer[PATBUFSIZE];
+
+if (strcmp(name, "-") == 0)
{
- char *eop = pattern + strlen(pattern);
- char buffer[PATBUFSIZE];
+ f = stdin;
+ filename = stdin_name;
+ }
+else
+ {
+ f = fopen(name, "r");
+ if (f == NULL)
+ {
+ fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
+ return FALSE;
+ }
+ filename = name;
+ }
+
+while (fgets(buffer, PATBUFSIZE, f) != NULL)
+ {
+ char *s = buffer + (int)strlen(buffer);
+ while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
+ *s = 0;
+ linenumber++;
+ if (buffer[0] == 0) continue; /* Skip blank lines */
+
+ /* Note: this call to add_pattern() puts a pointer to the local variable
+ "buffer" into the pattern chain. However, that pointer is used only when
+ compiling the pattern, which happens immediately below, so we flatten it
+ afterwards, as a precaution against any later code trying to use it. */
+
+ *patlastptr = add_pattern(buffer, *patlastptr);
+ if (*patlastptr == NULL) return FALSE;
+ if (*patptr == NULL) *patptr = *patlastptr;
+
+ /* This loop is needed because compiling a "pattern" when -F is set may add
+ on additional literal patterns if the original contains a newline. In the
+ common case, it never will, because fgets() stops at a newline. However,
+ the -N option can be used to give pcregrep a different newline setting. */
+
for(;;)
{
- int ellength;
- char *p = end_of_line(pattern, eop, &ellength);
- if (ellength == 0)
- return compile_single_pattern(pattern, options, filename, count);
- sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
- pattern = p;
- if (!compile_single_pattern(buffer, options, filename, count))
+ if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
+ linenumber))
return FALSE;
+ (*patlastptr)->string = NULL; /* Insurance */
+ if ((*patlastptr)->next == NULL) break;
+ *patlastptr = (*patlastptr)->next;
}
}
-else return compile_single_pattern(pattern, options, filename, count);
+
+if (f != stdin) fclose(f);
+return TRUE;
}
@@ -2099,12 +2337,9 @@ main(int argc, char **argv)
{
int i, j;
int rc = 1;
-int pcre_options = 0;
-int cmd_pattern_count = 0;
-int hint_count = 0;
-int errptr;
BOOL only_one_at_top;
-char *patterns[MAX_PATTERN_COUNT];
+patstr *cp;
+fnstr *fn;
const char *locale_from = "--locale";
const char *error;
@@ -2144,7 +2379,7 @@ for (i = 1; i < argc; i++)
if (argv[i][1] == 0)
{
- if (pattern_filename != NULL || pattern_count > 0) break;
+ if (pattern_files != NULL || patterns != NULL) break;
else pcregrep_exit(usage(2));
}
@@ -2354,18 +2589,37 @@ for (i = 1; i < argc; i++)
option_data = argv[++i];
}
- /* If the option type is OP_PATLIST, it's the -e option, which can be called
- multiple times to create a list of patterns. */
+ /* If the option type is OP_PATLIST, it's the -e option, or one of the
+ include/exclude options, which can be called multiple times to create lists
+ of patterns. */
if (op->type == OP_PATLIST)
+ {
+ patdatastr *pd = (patdatastr *)op->dataptr;
+ *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
+ if (*(pd->lastptr) == NULL) goto EXIT2;
+ if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
+ }
+
+ /* If the option type is OP_FILELIST, it's one of the options that names a
+ file. */
+
+ else if (op->type == OP_FILELIST)
{
- if (cmd_pattern_count >= MAX_PATTERN_COUNT)
+ fndatastr *fd = (fndatastr *)op->dataptr;
+ fn = (fnstr *)malloc(sizeof(fnstr));
+ if (fn == NULL)
{
- fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
- MAX_PATTERN_COUNT);
- return 2;
+ fprintf(stderr, "pcregrep: malloc failed\n");
+ goto EXIT2;
}
- patterns[cmd_pattern_count++] = option_data;
+ fn->next = NULL;
+ fn->name = option_data;
+ if (*(fd->anchor) == NULL)
+ *(fd->anchor) = fn;
+ else
+ (*(fd->lastptr))->next = fn;
+ *(fd->lastptr) = fn;
}
/* Handle OP_BINARY_FILES */
@@ -2580,77 +2834,47 @@ if (jfriedl_XT != 0 || jfriedl_XR != 0)
}
#endif
-/* Get memory for the main buffer, and to store the pattern and hints lists. */
+/* Get memory for the main buffer. */
bufsize = 3*bufthird;
main_buffer = (char *)malloc(bufsize);
-pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
-hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
-if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
+if (main_buffer == NULL)
{
fprintf(stderr, "pcregrep: malloc failed\n");
goto EXIT2;
}
-/* If no patterns were provided by -e, and there is no file provided by -f,
+/* If no patterns were provided by -e, and there are no files provided by -f,
the first argument is the one and only pattern, and it must exist. */
-if (cmd_pattern_count == 0 && pattern_filename == NULL)
+if (patterns == NULL && pattern_files == NULL)
{
if (i >= argc) return usage(2);
- patterns[cmd_pattern_count++] = argv[i++];
+ patterns = patterns_last = add_pattern(argv[i++], NULL);
+ if (patterns == NULL) goto EXIT2;
}
/* Compile the patterns that were provided on the command line, either by
-multiple uses of -e or as a single unkeyed pattern. */
+multiple uses of -e or as a single unkeyed pattern. We cannot do this until
+after all the command-line options are read so that we know which PCRE options
+to use. When -F is used, compile_pattern() may add another block into the
+chain, so we must not access the next pointer till after the compile. */
-for (j = 0; j < cmd_pattern_count; j++)
+for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
{
- if (!compile_pattern(patterns[j], pcre_options, NULL,
- (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
+ if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
+ (j == 1 && patterns->next == NULL)? 0 : j))
goto EXIT2;
}
-/* Compile the regular expressions that are provided in a file. */
+/* Read and compile the regular expressions that are provided in files. */
-if (pattern_filename != NULL)
+for (fn = pattern_files; fn != NULL; fn = fn->next)
{
- int linenumber = 0;
- FILE *f;
- char *filename;
- char buffer[PATBUFSIZE];
-
- if (strcmp(pattern_filename, "-") == 0)
- {
- f = stdin;
- filename = stdin_name;
- }
- else
- {
- f = fopen(pattern_filename, "r");
- if (f == NULL)
- {
- fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
- strerror(errno));
- goto EXIT2;
- }
- filename = pattern_filename;
- }
-
- while (fgets(buffer, PATBUFSIZE, f) != NULL)
- {
- char *s = buffer + (int)strlen(buffer);
- while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
- *s = 0;
- linenumber++;
- if (buffer[0] == 0) continue; /* Skip blank lines */
- if (!compile_pattern(buffer, pcre_options, filename, linenumber))
- goto EXIT2;
- }
-
- if (f != stdin) fclose(f);
- }
+ if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
+ goto EXIT2;
+ }
/* Study the regular expressions, as we will be running them many times. Unless
JIT has been explicitly disabled, arrange a stack for it to use. */
@@ -2660,20 +2884,19 @@ if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
#endif
-for (j = 0; j < pattern_count; j++)
+for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
{
- hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
+ cp->hint = pcre_study(cp->compiled, study_options, &error);
if (error != NULL)
{
char s[16];
- if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
+ if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
goto EXIT2;
}
- hint_count++;
#ifdef SUPPORT_PCREGREP_JIT
- if (jit_stack != NULL && hints_list[j] != NULL)
- pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
+ if (jit_stack != NULL && cp->hint != NULL)
+ pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
#endif
}
@@ -2682,12 +2905,12 @@ pcre_extra block for each pattern. */
if (match_limit > 0 || match_limit_recursion > 0)
{
- for (j = 0; j < pattern_count; j++)
+ for (cp = patterns; cp != NULL; cp = cp->next)
{
- if (hints_list[j] == NULL)
+ if (cp->hint == NULL)
{
- hints_list[j] = malloc(sizeof(pcre_extra));
- if (hints_list[j] == NULL)
+ cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
+ if (cp->hint == NULL)
{
fprintf(stderr, "pcregrep: malloc failed\n");
pcregrep_exit(2);
@@ -2695,81 +2918,69 @@ if (match_limit > 0 || match_limit_recursion > 0)
}
if (match_limit > 0)
{
- hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
- hints_list[j]->match_limit = match_limit;
+ cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
+ cp->hint->match_limit = match_limit;
}
if (match_limit_recursion > 0)
{
- hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- hints_list[j]->match_limit_recursion = match_limit_recursion;
+ cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ cp->hint->match_limit_recursion = match_limit_recursion;
}
}
}
-/* If there are include or exclude patterns, compile them. */
+/* If there are include or exclude patterns read from the command line, compile
+them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
+0. */
-if (exclude_pattern != NULL)
+for (j = 0; j < 4; j++)
{
- exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
- pcretables);
- if (exclude_compiled == NULL)
+ int k;
+ for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
{
- fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
- errptr, error);
- goto EXIT2;
+ if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
+ (k == 1 && cp->next == NULL)? 0 : k))
+ goto EXIT2;
}
}
-if (include_pattern != NULL)
+/* Read and compile include/exclude patterns from files. */
+
+for (fn = include_from; fn != NULL; fn = fn->next)
{
- include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
- pcretables);
- if (include_compiled == NULL)
- {
- fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
- errptr, error);
+ if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
goto EXIT2;
- }
}
-if (exclude_dir_pattern != NULL)
+for (fn = exclude_from; fn != NULL; fn = fn->next)
{
- exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
- pcretables);
- if (exclude_dir_compiled == NULL)
- {
- fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
- errptr, error);
+ if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
goto EXIT2;
- }
}
-if (include_dir_pattern != NULL)
+/* If there are no files that contain lists of files to search, and there are
+no file arguments, search stdin, and then exit. */
+
+if (file_lists == NULL && i >= argc)
{
- include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
- pcretables);
- if (include_dir_compiled == NULL)
- {
- fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
- errptr, error);
- goto EXIT2;
- }
+ rc = pcregrep(stdin, FR_PLAIN, stdin_name,
+ (filenames > FN_DEFAULT)? stdin_name : NULL);
+ goto EXIT;
}
-/* If a file that contains a list of files to search has been specified, read
-it line by line and search the given files. Otherwise, if there are no further
-arguments, do the business on stdin and exit. */
+/* If any files that contains a list of files to search have been specified,
+read them line by line and search the given files. */
-if (file_list != NULL)
+for (fn = file_lists; fn != NULL; fn = fn->next)
{
char buffer[PATBUFSIZE];
FILE *fl;
- if (strcmp(file_list, "-") == 0) fl = stdin; else
+ if (strcmp(fn->name, "-") == 0) fl = stdin; else
{
- fl = fopen(file_list, "rb");
+ fl = fopen(fn->name, "rb");
if (fl == NULL)
{
- fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
+ fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
strerror(errno));
goto EXIT2;
}
@@ -2787,24 +2998,14 @@ if (file_list != NULL)
else if (frc == 0 && rc == 1) rc = 0;
}
}
- if (fl != stdin) fclose (fl);
+ if (fl != stdin) fclose(fl);
}
-/* Do this only if there was no file list (and no file arguments). */
-
-else if (i >= argc)
- {
- rc = pcregrep(stdin, FR_PLAIN, stdin_name,
- (filenames > FN_DEFAULT)? stdin_name : NULL);
- goto EXIT;
- }
+/* After handling file-list, work through remaining arguments. Pass in the fact
+that there is only one argument at top level - this suppresses the file name if
+the argument is not a directory and filenames are not otherwise forced. */
-/* After handling file-list or if there are remaining arguments, work through
-them as files or directories. Pass in the fact that there is only one argument
-at top level - this suppresses the file name if the argument is not a directory
-and filenames are not otherwise forced. */
-
-only_one_at_top = i == argc - 1 && file_list == NULL;
+only_one_at_top = i == argc - 1 && file_lists == NULL;
for (; i < argc; i++)
{
@@ -2818,20 +3019,20 @@ EXIT:
#ifdef SUPPORT_PCREGREP_JIT
if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
#endif
+
if (main_buffer != NULL) free(main_buffer);
-if (pattern_list != NULL)
- {
- for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
- free(pattern_list);
- }
-if (hints_list != NULL)
- {
- for (i = 0; i < hint_count; i++)
- {
- if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
- }
- free(hints_list);
- }
+
+free_pattern_chain(patterns);
+free_pattern_chain(include_patterns);
+free_pattern_chain(include_dir_patterns);
+free_pattern_chain(exclude_patterns);
+free_pattern_chain(exclude_dir_patterns);
+
+free_file_chain(exclude_from);
+free_file_chain(include_from);
+free_file_chain(pattern_files);
+free_file_chain(file_lists);
+
pcregrep_exit(rc);
EXIT2: