diff options
author | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:39:42 +0000 |
---|---|---|
committer | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:39:42 +0000 |
commit | 93751f5e0bbc35cf20c27d1e17c070555671f4f1 (patch) | |
tree | 8f552f729ff6fee21de091d696e94e2890931c7d /pcregrep.c | |
parent | e87cad160ebb5eefbd74344fc0c78318432ae433 (diff) | |
download | pcre-93751f5e0bbc35cf20c27d1e17c070555671f4f1.tar.gz |
Load pcre-3.5 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@53 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcregrep.c')
-rw-r--r-- | pcregrep.c | 424 |
1 files changed, 368 insertions, 56 deletions
@@ -3,8 +3,9 @@ *************************************************/ /* This is a grep program that uses the PCRE regular expression library to do -its pattern matching. */ +its pattern matching. On a Unix system it can recurse into directories. */ +#include <ctype.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -17,22 +18,122 @@ its pattern matching. */ typedef int BOOL; +#define VERSION "2.0 01-Aug-2001" +#define MAX_PATTERN_COUNT 100 /************************************************* * Global variables * *************************************************/ -static pcre *pattern; -static pcre_extra *hints; +static char *pattern_filename = NULL; +static int pattern_count = 0; +static pcre **pattern_list; +static pcre_extra **hints_list; static BOOL count_only = FALSE; +static BOOL filenames = TRUE; static BOOL filenames_only = FALSE; static BOOL invert = FALSE; static BOOL number = FALSE; +static BOOL recurse = FALSE; static BOOL silent = FALSE; static BOOL whole_lines = FALSE; +/* Structure for options and list of them */ + +typedef struct option_item { + int one_char; + char *long_name; + char *help_text; +} option_item; + +static option_item optionlist[] = { + { -1, "help", "display this help and exit" }, + { 'c', "count", "print only a count of matching lines per FILE" }, + { 'h', "no-filename", "suppress the prefixing filename on output" }, + { 'i', "ignore-case", "ignore case distinctions" }, + { 'l', "files-with-matches", "print only FILE names containing matches" }, + { 'n', "line-number", "print line number with output lines" }, + { 'r', "recursive", "recursively scan sub-directories" }, + { 's', "no-messages", "suppress error messages" }, + { 'V', "version", "print version information and exit" }, + { 'v', "invert-match", "select non-matching lines" }, + { 'x', "line-regex", "force PATTERN to match only whole lines" }, + { 'x', "line-regexp", "force PATTERN to match only whole lines" }, + { 0, NULL, NULL } +}; + + +/************************************************* +* Functions for directory scanning * +*************************************************/ + +/* These functions are defined so that they can be made system specific, +although at present the only ones are for Unix, and for "no directory recursion +support". */ + + +/************* Directory scanning in Unix ***********/ + +#if IS_UNIX +#include <sys/types.h> +#include <sys/stat.h> +#include <dirent.h> + +typedef DIR directory_type; + +int +isdirectory(char *filename) +{ +struct stat statbuf; +if (stat(filename, &statbuf) < 0) + return 0; /* In the expectation that opening as a file will fail */ +return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; +} + +directory_type * +opendirectory(char *filename) +{ +return opendir(filename); +} + +char * +readdirectory(directory_type *dir) +{ +for (;;) + { + struct dirent *dent = readdir(dir); + if (dent == NULL) return NULL; + if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) + return dent->d_name; + } +return NULL; /* Keep compiler happy; never executed */ +} + +void +closedirectory(directory_type *dir) +{ +closedir(dir); +} + + +#else + + +/************* Directory scanning when we can't do it ***********/ + +/* The type is void, and apart from isdirectory(), the functions do nothing. */ + +typedef void directory_type; + +int isdirectory(char *filename) { return FALSE; } +directory_type * opendirectory(char *filename) {} +char *readdirectory(directory_type *dir) {} +void closedirectory(directory_type *dir) {} + +#endif + #if ! HAVE_STRERROR @@ -72,13 +173,18 @@ char buffer[BUFSIZ]; while (fgets(buffer, sizeof(buffer), in) != NULL) { - BOOL match; + BOOL match = FALSE; + int i; int length = (int)strlen(buffer); if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; linenumber++; - match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0; - if (match && whole_lines && offsets[1] != length) match = FALSE; + for (i = 0; !match && i < pattern_count; i++) + { + match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0, + offsets, 99) >= 0; + if (match && whole_lines && offsets[1] != length) match = FALSE; + } if (match != invert) { @@ -116,13 +222,73 @@ return rc; /************************************************* +* Grep a file or recurse into a directory * +*************************************************/ + +static int +grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames, + BOOL only_one_at_top) +{ +int rc = 1; +int sep; +FILE *in; + +/* If the file is a directory and we are recursing, scan each file within it. +The scanning code is localized so it can be made system-specific. */ + +if ((sep = isdirectory(filename)) != 0 && recurse) + { + char buffer[1024]; + char *nextfile; + directory_type *dir = opendirectory(filename); + + if (dir == NULL) + { + fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename, + strerror(errno)); + return 2; + } + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile); + frc = grep_or_recurse(buffer, recurse, TRUE, FALSE); + if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } + +/* If the file is not a directory, or we are not recursing, scan it. If this is +the first and only argument at top level, we don't show the file name. +Otherwise, control is via the show_filenames variable. */ + +in = fopen(filename, "r"); +if (in == NULL) + { + fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno)); + return 2; + } + +rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL); +fclose(in); +return rc; +} + + + + +/************************************************* * Usage function * *************************************************/ static int usage(int rc) { -fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n"); +fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n"); +fprintf(stderr, "Type `pcregrep --help' for more information.\n"); return rc; } @@ -130,96 +296,242 @@ return rc; /************************************************* +* Help function * +*************************************************/ + +static void +help(void) +{ +option_item *op; + +printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n"); +printf("Search for PATTERN in each FILE or standard input.\n"); +printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); + +printf("Options:\n"); + +for (op = optionlist; op->one_char != 0; op++) + { + int n; + char s[4]; + if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); + printf(" %s --%s%n", s, op->long_name, &n); + n = 30 - n; + if (n < 1) n = 1; + printf("%.*s%s\n", n, " ", op->help_text); + } + +printf("\n -f<filename> or --file=<filename>\n"); +printf(" Read patterns from <filename> instead of using a command line option.\n"); +printf(" Trailing white space is removed; blanks lines are ignored.\n"); +printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); + +printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n"); +printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); +} + + + + +/************************************************* +* Handle an option * +*************************************************/ + +static int +handle_option(int letter, int options) +{ +switch(letter) + { + case -1: help(); exit(0); + case 'c': count_only = TRUE; break; + case 'h': filenames = FALSE; break; + case 'i': options |= PCRE_CASELESS; break; + case 'l': filenames_only = TRUE; + case 'n': number = TRUE; break; + case 'r': recurse = TRUE; break; + case 's': silent = TRUE; break; + case 'v': invert = TRUE; break; + case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; + + case 'V': + fprintf(stderr, "pcregrep version %s using ", VERSION); + fprintf(stderr, "PCRE version %s\n", pcre_version()); + exit(0); + break; + + default: + fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); + exit(usage(2)); + } + +return options; +} + + + + +/************************************************* * Main program * *************************************************/ int main(int argc, char **argv) { -int i; +int i, j; int rc = 1; int options = 0; int errptr; const char *error; -BOOL filenames = TRUE; +BOOL only_one_at_top; /* Process the options */ for (i = 1; i < argc; i++) { - char *s; if (argv[i][0] != '-') break; - s = argv[i] + 1; - while (*s != 0) + + /* Long name options */ + + if (argv[i][1] == '-') + { + option_item *op; + + if (strncmp(argv[i]+2, "file=", 5) == 0) + { + pattern_filename = argv[i] + 7; + continue; + } + + for (op = optionlist; op->one_char != 0; op++) + { + if (strcmp(argv[i]+2, op->long_name) == 0) + { + options = handle_option(op->one_char, options); + break; + } + } + if (op->one_char == 0) + { + fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); + exit(usage(2)); + } + } + + /* One-char options */ + + else { - switch (*s++) + char *s = argv[i] + 1; + while (*s != 0) { - case 'c': count_only = TRUE; break; - case 'h': filenames = FALSE; break; - case 'i': options |= PCRE_CASELESS; break; - case 'l': filenames_only = TRUE; - case 'n': number = TRUE; break; - case 's': silent = TRUE; break; - case 'v': invert = TRUE; break; - case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; - - case 'V': - fprintf(stderr, "PCRE version %s\n", pcre_version()); - break; - - default: - fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]); - return usage(2); + if (*s == 'f') + { + pattern_filename = s + 1; + if (pattern_filename[0] == 0) + { + if (i >= argc - 1) + { + fprintf(stderr, "pcregrep: File name missing after -f\n"); + exit(usage(2)); + } + pattern_filename = argv[++i]; + } + break; + } + else options = handle_option(*s++, options); } } } -/* There must be at least a regexp argument */ +pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); +hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); -if (i >= argc) return usage(0); +if (pattern_list == NULL || hints_list == NULL) + { + fprintf(stderr, "pcregrep: malloc failed\n"); + return 2; + } -/* Compile the regular expression. */ +/* Compile the regular expression(s). */ -pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL); -if (pattern == NULL) +if (pattern_filename != NULL) { - fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error); - return 2; + FILE *f = fopen(pattern_filename, "r"); + char buffer[BUFSIZ]; + if (f == NULL) + { + fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, + strerror(errno)); + return 2; + } + while (fgets(buffer, sizeof(buffer), f) != NULL) + { + char *s = buffer + (int)strlen(buffer); + if (pattern_count >= MAX_PATTERN_COUNT) + { + fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n", + MAX_PATTERN_COUNT); + return 2; + } + while (s > buffer && isspace((unsigned char)(s[-1]))) s--; + if (s == buffer) continue; + *s = 0; + pattern_list[pattern_count] = pcre_compile(buffer, options, &error, + &errptr, NULL); + if (pattern_list[pattern_count++] == NULL) + { + fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n", + pattern_count, errptr, error); + return 2; + } + } + fclose(f); } -/* Study the regular expression, as we will be running it may times */ +/* If no file name, a single regex must be given inline */ -hints = pcre_study(pattern, 0, &error); -if (error != NULL) +else { - fprintf(stderr, "pcregrep: error while studing regex: %s\n", error); - return 2; + if (i >= argc) return usage(0); + pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL); + if (pattern_list[0] == NULL) + { + fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr, + error); + return 2; + } + pattern_count++; + } + +/* Study the regular expressions, as we will be running them may times */ + +for (j = 0; j < pattern_count; j++) + { + hints_list[j] = pcre_study(pattern_list[j], 0, &error); + if (error != NULL) + { + char s[16]; + if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); + fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); + return 2; + } } /* If there are no further arguments, do the business on stdin and exit */ if (i >= argc) return pcregrep(stdin, NULL); -/* Otherwise, work through the remaining arguments as files. If there is only -one, don't give its name on the output. */ +/* Otherwise, work through the remaining arguments as files or directories. +Pass in the fact that there is only one argument at top level - this suppresses +the file name if the argument is not a directory. */ -if (i == argc - 1) filenames = FALSE; +only_one_at_top = (i == argc - 1); if (filenames_only) filenames = TRUE; for (; i < argc; i++) { - FILE *in = fopen(argv[i], "r"); - if (in == NULL) - { - fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno)); - rc = 2; - } - else - { - int frc = pcregrep(in, filenames? argv[i] : NULL); - if (frc == 0 && rc == 1) rc = 0; - fclose(in); - } + int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top); + if (frc == 0 && rc == 1) rc = 0; } return rc; |