diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2016-12-31 17:40:45 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2016-12-31 17:40:45 +0000 |
commit | 9c63b32be361e637bf9797d66dc5995b5451809c (patch) | |
tree | 42a02a700dea948ed3a5f4e0aba49ca604523e5c | |
parent | 5b4ac7bf2c9fb6428c9609020b1ac2d2f58b1421 (diff) | |
download | pcre2-9c63b32be361e637bf9797d66dc5995b5451809c.tar.gz |
Upgrade pcre2grep for Windows courtesy of Jason Hood.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@640 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | configure.ac | 24 | ||||
-rw-r--r-- | doc/pcre2grep.1 | 39 | ||||
-rw-r--r-- | src/pcre2grep.c | 177 | ||||
-rw-r--r-- | testdata/grepoutput | 30 |
5 files changed, 212 insertions, 80 deletions
@@ -284,6 +284,28 @@ Now such patterns are no longer completely auto-possessified. 45. Minor cosmetic fix to pcre2test: move a variable that is not used under Windows into the "not Windows" code. +46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy +some of the code: + + * normalised the Windows condition by ensuring WIN32 is defined; + * enables the callout feature under Windows; + * adds globbing (Microsoft's implementation expands quoted args), + using a tweaked opendirectory; + * implements the is_*_tty functions for Windows; + * --color=always will write the ANSI sequences to file; + * add sequences 4 (underline works on Win10) and 5 (blink as bright + background, relatively standard on DOS/Win); + * remove the (char *) casts for the now-const strings; + * remove GREP_COLOUR (grep's command line allowed the 'u', but not + the environment), parsing GREP_COLORS instead; + * uses the current colour if not set, rather than black; + * add print_match for the undefined case; + * fixes a typo. + +In addition, colour settings containing anything other than digits and +semicolon are ignored, and the colour controls are no longer output for empty +strings. + Version 10.22 29-July-2016 -------------------------- diff --git a/configure.ac b/configure.ac index fc5cea2..7b62d7f 100644 --- a/configure.ac +++ b/configure.ac @@ -148,16 +148,11 @@ AC_ARG_ENABLE(pcre2grep-jit, [disable JIT support in pcre2grep]), , enable_pcre2grep_jit=yes) -# Handle --disable-pcre2grep-callout (enabled by default) but not supported -# for Windows. -if test "$HAVE_WINDOWS_H" != "1"; then - AC_ARG_ENABLE(pcre2grep-callout, - AS_HELP_STRING([--disable-pcre2grep-callout], - [disable callout script support in pcre2grep]), - , enable_pcre2grep_callout=yes) -else - enable_pcre2grep_callout=no -fi +# Handle --disable-pcre2grep-callout (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) # Handle --enable-rebuild-chartables AC_ARG_ENABLE(rebuild-chartables, @@ -577,19 +572,14 @@ if test "$enable_pcre2grep_jit" = "yes"; then have no effect unless SUPPORT_JIT is also defined.]) fi -# Currently pcre2grep callout string is not supported under Windows. - if test "$enable_pcre2grep_callout" = "yes"; then if test "$HAVE_WINDOWS_H" != "1"; then if test "$HAVE_SYS_WAIT_H" != "1"; then AC_MSG_ERROR([Callout script support needs sys/wait.h.]) fi - AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ - Define to any value to enable callout script support in pcre2grep.]) - else - AC_MSG_WARN([Callout script support is not available for Windows: disabled]) - enable_pcre2grep_callout=no fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) fi if test "$enable_unicode" = "yes"; then diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 index 13d3c41..a1361f8 100644 --- a/doc/pcre2grep.1 +++ b/doc/pcre2grep.1 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "31 October 2016" "PCRE2 10.23" +.TH PCRE2GREP 1 "31 December 2016" "PCRE2 10.23" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -205,13 +205,22 @@ connected to a terminal. More resources are used when colouring is enabled, because \fBpcre2grep\fP has to search for all possible matches in a line, not just one, in order to colour them all. .sp -The colour that is used can be specified by setting the environment variable -PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set, -\fBpcre2grep\fP looks for GREP_COLOUR or GREP_COLOR. The value of the variable -should be a string of two numbers, separated by a semicolon. They are copied -directly into the control string for setting colour on a terminal, so it is -your responsibility to ensure that they make sense. If neither of the -environment variables is set, the default is "1;31", which gives red. +The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +.sp +If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. .TP \fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP If an input path is not a regular file or a directory, "action" specifies how @@ -688,12 +697,12 @@ character. Otherwise \fBpcre2grep\fP will assume that it has no data. .SH "CALLING EXTERNAL SCRIPTS" .rs .sp -On non-Windows systems, \fBpcre2grep\fP has, by default, support for calling -external programs or scripts during matching by making use of PCRE2's callout -facility. However, this support can be disabled when \fBpcre2grep\fP is built. -You can find out whether your binary has support for callouts by running it -with the \fB--help\fP option. If the support is not enabled, all callouts in -patterns are ignored by \fBpcre2grep\fP. +\fBpcre2grep\fP has, by default, support for calling external programs or +scripts during matching by making use of PCRE2's callout facility. However, +this support can be disabled when \fBpcre2grep\fP is built. You can find out +whether your binary has support for callouts by running it with the \fB--help\fP +option. If the support is not enabled, all callouts in patterns are ignored by +\fBpcre2grep\fP. .P A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is either a number or a quoted string (see the @@ -784,6 +793,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 31 October 2016 +Last updated: 31 December 2016 Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 5d1654e..19cf6f3 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -58,14 +58,22 @@ POSSIBILITY OF SUCH DAMAGE. #include <sys/types.h> #include <sys/stat.h> -#if defined(_WIN32) || defined(WIN32) +#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) && !defined WIN32 +#define WIN32 +#endif + +#ifdef WIN32 #include <io.h> /* For _setmode() */ #include <fcntl.h> /* For _O_BINARY */ #endif #ifdef SUPPORT_PCRE2GREP_CALLOUT +#ifdef WIN32 +#include <process.h> +#else #include <sys/wait.h> #endif +#endif #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -135,7 +143,7 @@ convert \r\n at the ends of output lines to \r\r\n. However, that means that any messages written to stdout must have \r\n as their line terminator. This is handled by using STDOUT_NL as the newline string. */ -#if defined(_WIN32) || defined(WIN32) +#ifdef WIN32 #define STDOUT_NL "\r\n" #else #define STDOUT_NL "\n" @@ -158,14 +166,14 @@ static const char *jfriedl_prefix = ""; static const char *jfriedl_postfix = ""; #endif -static const char *colour_string = (char *)"1;31"; +static const char *colour_string = "1;31"; static const char *colour_option = NULL; static const char *dee_option = NULL; static const char *DEE_option = NULL; static const char *locale = NULL; static const char *newline_arg = NULL; -static const char *om_separator = (char *)""; -static const char *stdin_name = (char *)"(standard input)"; +static const char *om_separator = ""; +static const char *stdin_name = "(standard input)"; static char *main_buffer = NULL; @@ -180,7 +188,7 @@ static int endlinetype; static int total_count = 0; static int counts_printed = 0; -#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H +#ifdef WIN32 static int dee_action = dee_SKIP; #else static int dee_action = dee_READ; @@ -209,6 +217,9 @@ static PCRE2_SIZE *offsets; static BOOL count_only = FALSE; static BOOL do_colour = FALSE; +#ifdef WIN32 +static BOOL do_ansi = FALSE; +#endif static BOOL file_offsets = FALSE; static BOOL hyphenpending = FALSE; static BOOL invert = FALSE; @@ -463,6 +474,34 @@ return 0; } +/************************************************* +* Parse GREP_COLORS * +*************************************************/ + +/* Extract ms or mt from GREP_COLORS. + +Argument: the string, possibly NULL +Returns: the value of ms or mt, or NULL if neither present +*/ + +static char * +parse_grep_colors(const char *gc) +{ +static char seq[16]; +char *col; +uint32_t len; +if (gc == NULL) return NULL; +col = strstr(gc, "ms="); +if (col == NULL) col = strstr(gc, "mt="); +if (col == NULL) return NULL; +len = 0; +col += 3; +while (*col != ':' && *col != 0 && len < sizeof(seq)-1) + seq[len++] = *col++; +seq[len] = 0; +return seq; +} + /************************************************* * Exit from the program * @@ -691,6 +730,7 @@ return isatty(fileno(f)); static void print_match(const char* buf, int length) { +if (length == 0) return; if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); FWRITE(buf, 1, length, stdout); if (do_colour) fprintf(stdout, "%c[0m", 0x1b); @@ -704,11 +744,9 @@ if (do_colour) fprintf(stdout, "%c[0m", 0x1b); /* I (Philip Hazel) have no means of testing this code. It was contributed by Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES when it did not exist. David Byron added a patch that moved the #include of -<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. -The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is -undefined when it is indeed undefined. */ +<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. */ -#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H +#elif defined WIN32 #ifndef STRICT # define STRICT @@ -723,6 +761,11 @@ undefined when it is indeed undefined. */ #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF #endif +/* Allow opendirectory to provide globbing, since Microsoft started doing it +wrong (expanding quoted arguments). */ + +#define iswild(name) (strpbrk(name, "*?") != NULL) + typedef struct directory_type { HANDLE handle; @@ -757,7 +800,10 @@ if ((pattern == NULL) || (dir == NULL)) pcre2grep_exit(2); } memcpy(pattern, filename, len); -memcpy(&(pattern[len]), "\\*", 3); +if (iswild(filename)) + pattern[len] = 0; +else + memcpy(&(pattern[len]), "\\*", 3); dir->handle = FindFirstFile(pattern, &(dir->data)); if (dir->handle != INVALID_HANDLE_VALUE) { @@ -815,18 +861,16 @@ return !isdirectory(filename); /************* Test for a terminal in Windows **********/ -/* I don't know how to do this; assume never */ - static BOOL is_stdout_tty(void) { -return FALSE; +return _isatty(_fileno(stdout)); } static BOOL is_file_tty(FILE *f) { -return FALSE; +return _isatty(_fileno(f)); } @@ -839,24 +883,35 @@ static WORD match_colour; static void print_match(const char* buf, int length) { -if (do_colour) SetConsoleTextAttribute(hstdout, match_colour); +if (length == 0) return; +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + else SetConsoleTextAttribute(hstdout, match_colour); + } FWRITE(buf, 1, length, stdout); -if (do_colour) SetConsoleTextAttribute(hstdout, csbi.wAttributes); +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[00m", 0x1b); + else SetConsoleTextAttribute(hstdout, csbi.wAttributes); + } } /* Convert ANSI BGR format to RGB used by Windows */ #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0)) static WORD -decode_ANSI_colour(char *cs) +decode_ANSI_colour(const char *cs) { -WORD result = 0; +WORD result = csbi.wAttributes; while (*cs) { if (isdigit(*cs)) { int code = atoi(cs); if (code == 1) result |= 0x08; + else if (code == 4) result |= 0x8000; + else if (code == 5) result |= 0x80; else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30); else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F); else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4); @@ -880,8 +935,14 @@ init_colour_output() if (do_colour) { hstdout = GetStdHandle(STD_OUTPUT_HANDLE); - GetConsoleScreenBufferInfo(hstdout, &csbi); - + /* This fails when redirected to con; try again if so. */ + if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi) + { + HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL); + GetConsoleScreenBufferInfo(hcon, &csbi); + CloseHandle(hcon); + } match_colour = decode_ANSI_colour(colour_string); /* No valid colour found - turn off colouring */ if (!match_colour) do_colour = FALSE; @@ -927,6 +988,16 @@ is_file_tty(FILE *f) return FALSE; } + +/************* Print optionally coloured match when we can't do it **********/ + +static void +print_match(const char* buf, int length) +{ +if (length == 0) return; +FWRITE(buf, 1, length, stdout); +} + #endif /* End of system-specific functions */ @@ -1637,7 +1708,9 @@ char *args; char *argsptr; char **argsvector; char **argsvectorptr; +#ifndef WIN32 pid_t pid; +#endif int result = 0; (void)unused; /* Avoid compiler warning */ @@ -1825,6 +1898,9 @@ while (length > 0) *argsptr++ = '\0'; *argsvectorptr = NULL; +#ifdef WIN32 +result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); +#else pid = fork(); if (pid == 0) @@ -1835,6 +1911,7 @@ if (pid == 0) } else if (pid > 0) (void)waitpid(pid, &result, 0); +#endif free(args); free(argsvector); @@ -2635,6 +2712,36 @@ if (isdirectory(pathname)) } } +#ifdef WIN32 +if (iswild(pathname)) + { + char buffer[1024]; + char *nextfile; + char *name; + directory_type *dir = opendirectory(pathname); + + if (dir == NULL) + return 0; + + for (nextfile = name = pathname; *nextfile != 0; nextfile++) + if (*nextfile == '/' || *nextfile == '\\') + name = nextfile + 1; + *name = 0; + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + sprintf(buffer, "%.512s%.128s", pathname, nextfile); + frc = grep_or_recurse(buffer, dir_recurse, FALSE); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } +#endif + #if defined NATIVE_ZOS } #endif @@ -3057,7 +3164,7 @@ change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure that stdout is a binary stream. Note that this means all other output to stdout must use STDOUT_NL to terminate lines. */ -#if defined(_WIN32) || defined(WIN32) +#ifdef WIN32 _setmode( _fileno(stdout), _O_BINARY); #endif @@ -3281,7 +3388,7 @@ for (i = 1; i < argc; i++) switch (op->one_char) { case N_COLOUR: - colour_option = (char *)"auto"; + colour_option = "auto"; break; case 'o': @@ -3446,17 +3553,16 @@ if (locale != NULL) pcre2_set_character_tables(compile_context, character_tables); } -/* Sort out colouring. On non-Windows systems "auto" causes colouring only if -the output is a terminal. On Windows systems "auto" is the same as "always". */ +/* Sort out colouring */ if (colour_option != NULL && strcmp(colour_option, "never") != 0) { - if (strcmp(colour_option, "always") == 0) do_colour = TRUE; -#if defined(_WIN32) || defined(WIN32) - else if (strcmp(colour_option, "auto") == 0) do_colour = TRUE; -#else - else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); + if (strcmp(colour_option, "always") == 0) +#ifdef WIN32 + do_ansi = !is_stdout_tty(), #endif + do_colour = TRUE; + else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); else { fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n", @@ -3467,10 +3573,15 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0) { char *cs = getenv("PCRE2GREP_COLOUR"); if (cs == NULL) cs = getenv("PCRE2GREP_COLOR"); - if (cs == NULL) cs = getenv("GREP_COLOUR"); + if (cs == NULL) cs = getenv("PCREGREP_COLOUR"); + if (cs == NULL) cs = getenv("PCREGREP_COLOR"); + if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS")); if (cs == NULL) cs = getenv("GREP_COLOR"); - if (cs != NULL) colour_string = cs; -#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H + if (cs != NULL) + { + if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs; + } +#ifdef WIN32 init_colour_output(); #endif } diff --git a/testdata/grepoutput b/testdata/grepoutput index a139b44..ee9a9eb 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -725,21 +725,21 @@ RC=0 14: RC=0 ---------------------------- Test 105 ----------------------------- -[1;31m[0mtriple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[0m -[1;31m[0mtriple: t2_txt s1_tag s_txt p_tag p_txt o_tag -[1;31m[0mLorem [1;31mipsum[0m dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -[1;31m[0m -[1;31m[0mtriple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[0m -[1;31m[0mtriple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[0m -[1;31m[0mtriple: t5_txt s1_tag s_txt p_tag p_txt o_tag -[1;31m[0mo_txt -[1;31m[0m -[1;31m[0mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt -[1;31m[0m -[1;31m[0mtriple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem [1;31mipsum[0m dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt RC=0 ---------------------------- Test 106 ----------------------------- a |