summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-02-28 16:49:21 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-02-28 16:49:21 +0000
commitd2d6efa3e301f1003e89eed13825a3261892a3f7 (patch)
treeba6bf32c3cdc32823a8b9999f0d8550b07e791ad
parent5853ad42b1b6b27e14b3dafdb2e9f69655ce756d (diff)
downloadpcre-d2d6efa3e301f1003e89eed13825a3261892a3f7.tar.gz
Added --file-list to pcregrep.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@944 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rwxr-xr-xRunGrepTest8
-rw-r--r--doc/pcregrep.135
-rw-r--r--pcregrep.c55
-rw-r--r--testdata/grepfilelist3
-rw-r--r--testdata/grepoutput9
6 files changed, 92 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 697d525..2f9d466 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -65,7 +65,9 @@ Version 8.31
16. (*COMMIT) control verb is now supported by the JIT compiler.
-6. The Unicode data tables have been updated to Unicode 6.1.0.
+17. The Unicode data tables have been updated to Unicode 6.1.0.
+
+18. Added --file-list option to pcregrep.
Version 8.30 04-February-2012
diff --git a/RunGrepTest b/RunGrepTest
index fecc06e..4fa5ed3 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -407,6 +407,14 @@ echo "---------------------------- Test 83 -----------------------------" >>test
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
echo "RC=$?" >>testtry
+echo "---------------------------- Test 84 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist "fox|complete") >>testtry 2>&1
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 85 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index 6ee4cc6..055b7c8 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -224,11 +224,21 @@ each line of input. A data line is output if any of the patterns match it. The
filename can be given as "-" to refer to the standard input. When \fB-f\fP is
used, patterns specified on the command line using \fB-e\fP may also be
present; they are tested before the file's patterns. However, no other pattern
-is taken from the command line; all arguments are treated as file names. There
-is an overall maximum of 100 patterns. Trailing white space is removed from
-each line, and blank lines are ignored. An empty file contains no patterns and
-therefore matches nothing. See also the comments about multiple patterns versus
-a single pattern with alternatives in the description of \fB-e\fP above.
+is taken from the command line; all arguments are treated as the names of paths
+to be searched. There is an overall maximum of 100 patterns. Trailing white
+space is removed from each line, and blank lines are ignored. An empty file
+contains no patterns and therefore matches nothing. See also the comments about
+multiple patterns versus a single pattern with alternatives in the description
+of \fB-e\fP above.
+.TP
+\fB--file-list\fP=\fIfilename\fP
+Read a list of files to be searched from the given file, one per line. Trailing
+white space is removed from each line, and blank lines are ignored. These files
+are searched before any others that may be listed on the command line. The
+filename can be given as "-" to refer to the standard input. If \fB--file\fP
+and \fB--file-list\fP are both specified as "-", patterns are read first. This
+is useful only when the standard input is a terminal, from which further lines
+(the list of files) can be read after an end-of-file indication.
.TP
\fB--file-offsets\fP
Instead of showing lines or parts of lines that match, show each match as an
@@ -485,11 +495,12 @@ convert this to an appropriate sequence if the output is sent to a file.
Many of the short and long forms of \fBpcregrep\fP's options are the same
as in the GNU \fBgrep\fP program (version 2.5.4). Any long option of the form
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
-(PCRE terminology). However, the \fB--file-offsets\fP, \fB--include-dir\fP,
-\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
-\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--recursion-limit\fP,
-\fB-u\fP, and \fB--utf-8\fP options are specific to \fBpcregrep\fP, as is the
-use of the \fB--only-matching\fP option with a capturing parentheses number.
+(PCRE terminology). However, the \fB--file-list\fP, \fB--file-offsets\fP,
+\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP,
+\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP,
+\fB--recursion-limit\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
+\fBpcregrep\fP, as is the use of the \fB--only-matching\fP option with a
+capturing parentheses number.
.P
Although most of the common options work the same way, a few are different in
\fBpcregrep\fP. For example, the \fB--include\fP option's argument is a glob
@@ -577,6 +588,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 06 September 2011
-Copyright (c) 1997-2011 University of Cambridge.
+Last updated: 28 February 2012
+Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/pcregrep.c b/pcregrep.c
index 02cfc1f..7149425 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -147,6 +147,7 @@ static int pattern_count = 0;
static pcre **pattern_list = NULL;
static pcre_extra **hints_list = NULL;
+static char *file_list = NULL;
static char *include_pattern = NULL;
static char *exclude_pattern = NULL;
static char *include_dir_pattern = NULL;
@@ -225,6 +226,7 @@ used to identify them. */
#define N_M_LIMIT_REC (-14)
#define N_BUFSIZE (-15)
#define N_NOJIT (-16)
+#define N_FILE_LIST (-17)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", " terminate options" },
@@ -241,6 +243,7 @@ static option_item optionlist[] = {
{ OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
{ OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
+ { OP_STRING, N_FILE_LIST, &file_list, "file-list=path","read files to search from file" },
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
@@ -1851,8 +1854,8 @@ for (op = optionlist; op->one_char != 0; op++)
printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
-printf("When reading patterns from a file instead of using a command line option,\n");
-printf("trailing white space is removed and blank lines are ignored.\n");
+printf("When reading patterns or file names from a file, trailing white\n");
+printf("space is removed and blank lines are ignored.\n");
printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
MAX_PATTERN_COUNT, PATBUFSIZE);
@@ -2694,22 +2697,56 @@ if (include_dir_pattern != NULL)
goto EXIT2;
}
}
+
+/* If a file that contains a list of files to search has been specified, read
+it line by line and search the given files. Otherwise, if there are no further
+arguments, do the business on stdin and exit. */
-/* If there are no further arguments, do the business on stdin and exit. */
+if (file_list != NULL)
+ {
+ char buffer[PATBUFSIZE];
+ FILE *fl;
+ if (strcmp(file_list, "-") == 0) fl = stdin; else
+ {
+ fl = fopen(file_list, "rb");
+ if (fl == NULL)
+ {
+ fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
+ strerror(errno));
+ goto EXIT2;
+ }
+ }
+ while (fgets(buffer, PATBUFSIZE, fl) != NULL)
+ {
+ int frc;
+ char *end = buffer + (int)strlen(buffer);
+ while (end > buffer && isspace(end[-1])) end--;
+ *end = 0;
+ if (*buffer != 0)
+ {
+ frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
+ if (frc > 1) rc = frc;
+ else if (frc == 0 && rc == 1) rc = 0;
+ }
+ }
+ if (fl != stdin) fclose (fl);
+ }
+
+/* Do this only if there was no file list (and no file arguments). */
-if (i >= argc)
+else if (i >= argc)
{
rc = pcregrep(stdin, FR_PLAIN, stdin_name,
(filenames > FN_DEFAULT)? stdin_name : NULL);
goto EXIT;
}
-/* Otherwise, work through the remaining arguments as files or directories.
-Pass in the fact that there is only one argument at top level - this suppresses
-the file name if the argument is not a directory and filenames are not
-otherwise forced. */
+/* After handling file-list or if there are remaining arguments, work through
+them as files or directories. Pass in the fact that there is only one argument
+at top level - this suppresses the file name if the argument is not a directory
+and filenames are not otherwise forced. */
-only_one_at_top = i == argc - 1; /* Catch initial value of i */
+only_one_at_top = i == argc - 1 && file_list == NULL;
for (; i < argc; i++)
{
diff --git a/testdata/grepfilelist b/testdata/grepfilelist
new file mode 100644
index 0000000..dd73ec7
--- /dev/null
+++ b/testdata/grepfilelist
@@ -0,0 +1,3 @@
+testdata/grepinputv
+
+testdata/grepinputx
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 9b3fb29..74f7359 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -638,3 +638,12 @@ RC=0
pcregrep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
pcregrep: check the --buffer-size option
RC=2
+---------------------------- Test 84 -----------------------------
+testdata/grepinputv:fox jumps
+testdata/grepinputx:complete pair
+testdata/grepinputx:That was a complete pair
+testdata/grepinputx:complete pair
+RC=0
+---------------------------- Test 85 -----------------------------
+./testdata/grepinput3:Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+RC=0