diff options
| author | Edward Thomson <ethomson@edwardthomson.com> | 2019-09-28 17:32:18 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-09-28 17:32:18 +0100 |
| commit | 63307cbad7408b38693e73b74560a063da21f530 (patch) | |
| tree | 91c5020c06715623cf83e631e5e3247d2d7e6d02 | |
| parent | 70325370667370159d5b85690c6dd5db17be3b20 (diff) | |
| parent | f585b129e242bacb4cbecc30a6af727e5b4c2f28 (diff) | |
| download | libgit2-63307cbad7408b38693e73b74560a063da21f530.tar.gz | |
Merge pull request #5226 from pks-t/pks/regexp-api
regexp: implement a new regular expression API
| -rw-r--r-- | cmake/Modules/FindPCRE2.cmake | 5 | ||||
| -rw-r--r-- | src/common.h | 1 | ||||
| -rw-r--r-- | src/config.c | 43 | ||||
| -rw-r--r-- | src/config_file.c | 31 | ||||
| -rw-r--r-- | src/diff_driver.c | 34 | ||||
| -rw-r--r-- | src/errors.c | 15 | ||||
| -rw-r--r-- | src/errors.h | 7 | ||||
| -rw-r--r-- | src/posix_regex.h | 73 | ||||
| -rw-r--r-- | src/regexp.c | 221 | ||||
| -rw-r--r-- | src/regexp.h | 97 | ||||
| -rw-r--r-- | src/revparse.c | 35 | ||||
| -rw-r--r-- | src/userdiff.h | 4 | ||||
| -rw-r--r-- | tests/core/posix.c | 120 | ||||
| -rw-r--r-- | tests/core/regexp.c | 213 |
14 files changed, 599 insertions, 300 deletions
diff --git a/cmake/Modules/FindPCRE2.cmake b/cmake/Modules/FindPCRE2.cmake index 122f0e9d4..f8c5639d5 100644 --- a/cmake/Modules/FindPCRE2.cmake +++ b/cmake/Modules/FindPCRE2.cmake @@ -20,15 +20,14 @@ FIND_PATH(PCRE2_INCLUDE_DIR NAMES pcre2posix.h) # Look for the library. FIND_LIBRARY(PCRE2_LIBRARY NAMES pcre2-8) -FIND_LIBRARY(PCRE2_POSIX_LIBRARY NAMES pcre2-posix) # Handle the QUIETLY and REQUIRED arguments and set PCRE2_FOUND to TRUE if all listed variables are TRUE. INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_POSIX_LIBRARY PCRE2_INCLUDE_DIR) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_INCLUDE_DIR) # Copy the results to the output variables. IF(PCRE2_FOUND) - SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY} ${PCRE2_POSIX_LIBRARY}) + SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY}) SET(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR}) ELSE(PCRE2_FOUND) SET(PCRE2_LIBRARIES) diff --git a/src/common.h b/src/common.h index 8a5761abc..a4152caf2 100644 --- a/src/common.h +++ b/src/common.h @@ -88,7 +88,6 @@ #include "git2/deprecated.h" #include "posix.h" -#include "posix_regex.h" #define DEFAULT_BUFSIZE 65536 #define FILEIO_BUFSIZE DEFAULT_BUFSIZE diff --git a/src/config.c b/src/config.c index d0e439b55..8338da010 100644 --- a/src/config.c +++ b/src/config.c @@ -7,13 +7,15 @@ #include "config.h" -#include "sysdir.h" #include "git2/config.h" #include "git2/sys/config.h" -#include "vector.h" + #include "buf_text.h" #include "config_backend.h" +#include "regexp.h" +#include "sysdir.h" #include "transaction.h" +#include "vector.h" #if GIT_WIN32 # include <windows.h> #endif @@ -345,7 +347,7 @@ typedef struct { git_config_iterator parent; git_config_iterator *current; const git_config *cfg; - p_regex_t regex; + git_regexp regex; size_t i; } all_iter; @@ -423,7 +425,7 @@ static int all_iter_glob_next(git_config_entry **entry, git_config_iterator *_it */ while ((error = all_iter_next(entry, _iter)) == 0) { /* skip non-matching keys if regexp was provided */ - if (p_regexec(&iter->regex, (*entry)->name, 0, NULL, 0) != 0) + if (git_regexp_match(&iter->regex, (*entry)->name) != 0) continue; /* and simply return if we like the entry's name */ @@ -447,7 +449,7 @@ static void all_iter_glob_free(git_config_iterator *_iter) { all_iter *iter = (all_iter *) _iter; - p_regfree(&iter->regex); + git_regexp_dispose(&iter->regex); all_iter_free(_iter); } @@ -480,8 +482,7 @@ int git_config_iterator_glob_new(git_config_iterator **out, const git_config *cf iter = git__calloc(1, sizeof(all_iter)); GIT_ERROR_CHECK_ALLOC(iter); - if ((result = p_regcomp(&iter->regex, regexp, P_REG_EXTENDED)) != 0) { - git_error_set_regex(&iter->regex, result); + if ((result = git_regexp_compile(&iter->regex, regexp, 0)) < 0) { git__free(iter); return -1; } @@ -510,18 +511,13 @@ int git_config_backend_foreach_match( { git_config_entry *entry; git_config_iterator* iter; - p_regex_t regex; + git_regexp regex; int error = 0; assert(backend && cb); - if (regexp != NULL) { - if ((error = p_regcomp(®ex, regexp, P_REG_EXTENDED)) != 0) { - git_error_set_regex(®ex, error); - p_regfree(®ex); - return -1; - } - } + if (regexp && git_regexp_compile(®ex, regexp, 0) < 0) + return -1; if ((error = backend->iterator(&iter, backend)) < 0) { iter = NULL; @@ -530,7 +526,7 @@ int git_config_backend_foreach_match( while (!(iter->next(&entry, iter) < 0)) { /* skip non-matching keys if regexp was provided */ - if (regexp && p_regexec(®ex, entry->name, 0, NULL, 0) != 0) + if (regexp && git_regexp_match(®ex, entry->name) != 0) continue; /* abort iterator on non-zero return value */ @@ -541,7 +537,7 @@ int git_config_backend_foreach_match( } if (regexp != NULL) - p_regfree(®ex); + git_regexp_dispose(®ex); iter->free(iter); @@ -981,7 +977,7 @@ typedef struct { git_config_iterator parent; git_config_iterator *iter; char *name; - p_regex_t regex; + git_regexp regex; int have_regex; } multivar_iter; @@ -997,7 +993,7 @@ static int multivar_iter_next(git_config_entry **entry, git_config_iterator *_it if (!iter->have_regex) return 0; - if (p_regexec(&iter->regex, (*entry)->value, 0, NULL, 0) == 0) + if (git_regexp_match(&iter->regex, (*entry)->value) == 0) return 0; } @@ -1012,7 +1008,7 @@ void multivar_iter_free(git_config_iterator *_iter) git__free(iter->name); if (iter->have_regex) - p_regfree(&iter->regex); + git_regexp_dispose(&iter->regex); git__free(iter); } @@ -1032,13 +1028,8 @@ int git_config_multivar_iterator_new(git_config_iterator **out, const git_config goto on_error; if (regexp != NULL) { - error = p_regcomp(&iter->regex, regexp, P_REG_EXTENDED); - if (error != 0) { - git_error_set_regex(&iter->regex, error); - error = -1; - p_regfree(&iter->regex); + if ((error = git_regexp_compile(&iter->regex, regexp, 0)) < 0) goto on_error; - } iter->have_regex = 1; } diff --git a/src/config_file.c b/src/config_file.c index 849096d0f..bf770c95b 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -18,6 +18,7 @@ #include "config_entries.h" #include "config_parse.h" #include "filebuf.h" +#include "regexp.h" #include "strmap.h" #include "sysdir.h" #include "wildmatch.h" @@ -61,7 +62,7 @@ typedef struct { static int config_read(git_config_entries *entries, const git_repository *repo, config_file *file, git_config_level_t level, int depth); static int config_read_buffer(git_config_entries *entries, const git_repository *repo, config_file *file, git_config_level_t level, int depth, const char *buf, size_t buflen); -static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const p_regex_t *preg, const char *value); +static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const git_regexp *preg, const char *value); static char *escape_value(const char *ptr); /** @@ -350,21 +351,17 @@ static int config_set_multivar( git_config_backend *cfg, const char *name, const char *regexp, const char *value) { config_file_backend *b = GIT_CONTAINER_OF(cfg, config_file_backend, parent); - char *key; - p_regex_t preg; + git_regexp preg; int result; + char *key; assert(regexp); if ((result = git_config__normalize_name(name, &key)) < 0) return result; - result = p_regcomp(&preg, regexp, P_REG_EXTENDED); - if (result != 0) { - git_error_set_regex(&preg, result); - result = -1; + if ((result = git_regexp_compile(&preg, regexp, 0)) < 0) goto out; - } /* If we do have it, set call config_write() and reload */ if ((result = config_write(b, name, key, &preg, value)) < 0) @@ -372,7 +369,7 @@ static int config_set_multivar( out: git__free(key); - p_regfree(&preg); + git_regexp_dispose(&preg); return result; } @@ -412,7 +409,7 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con config_file_backend *b = GIT_CONTAINER_OF(cfg, config_file_backend, parent); git_config_entries *entries = NULL; git_config_entry *entry = NULL; - p_regex_t preg = { 0 }; + git_regexp preg = GIT_REGEX_INIT; char *key = NULL; int result; @@ -430,11 +427,8 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con goto out; } - if ((result = p_regcomp(&preg, regexp, P_REG_EXTENDED)) != 0) { - git_error_set_regex(&preg, result); - result = -1; + if ((result = git_regexp_compile(&preg, regexp, 0)) < 0) goto out; - } if ((result = config_write(b, name, key, &preg, NULL)) < 0) goto out; @@ -442,7 +436,7 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con out: git_config_entries_free(entries); git__free(key); - p_regfree(&preg); + git_regexp_dispose(&preg); return result; } @@ -928,7 +922,7 @@ struct write_data { const char *section; const char *orig_name; const char *name; - const p_regex_t *preg; + const git_regexp *preg; const char *value; }; @@ -1033,7 +1027,7 @@ static int write_on_variable( /* If we have a regex to match the value, see if it matches */ if (has_matched && write_data->preg != NULL) - has_matched = (p_regexec(write_data->preg, var_value, 0, NULL, 0) == 0); + has_matched = (git_regexp_match(write_data->preg, var_value) == 0); /* If this isn't the name/value we're looking for, simply dump the * existing data back out and continue on. @@ -1094,7 +1088,8 @@ static int write_on_eof( /* * This is pretty much the parsing, except we write out anything we don't have */ -static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const p_regex_t *preg, const char* value) +static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const git_regexp *preg, const char* value) + { char *orig_section = NULL, *section = NULL, *orig_name, *name, *ldot; git_buf buf = GIT_BUF_INIT, contents = GIT_BUF_INIT; diff --git a/src/diff_driver.c b/src/diff_driver.c index 6919e4e61..831d3262d 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -15,6 +15,7 @@ #include "map.h" #include "buf_text.h" #include "config.h" +#include "regexp.h" #include "repository.h" typedef enum { @@ -25,7 +26,7 @@ typedef enum { } git_diff_driver_t; typedef struct { - p_regex_t re; + git_regexp re; int flags; } git_diff_driver_pattern; @@ -39,7 +40,7 @@ struct git_diff_driver { uint32_t binary_flags; uint32_t other_flags; git_array_t(git_diff_driver_pattern) fn_patterns; - p_regex_t word_pattern; + git_regexp word_pattern; char name[GIT_FLEX_ARRAY]; }; @@ -113,7 +114,7 @@ static int diff_driver_add_patterns( if (error < 0) break; - if ((error = p_regcomp(&pat->re, buf.ptr, regex_flags)) != 0) { + if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) { /* * TODO: issue a warning */ @@ -130,7 +131,7 @@ static int diff_driver_add_patterns( static int diff_driver_xfuncname(const git_config_entry *entry, void *payload) { - return diff_driver_add_patterns(payload, entry->value, P_REG_EXTENDED); + return diff_driver_add_patterns(payload, entry->value, 0); } static int diff_driver_funcname(const git_config_entry *entry, void *payload) @@ -205,16 +206,12 @@ static int git_diff_driver_builtin( if (ddef->fns && (error = diff_driver_add_patterns( - drv, ddef->fns, ddef->flags | P_REG_EXTENDED)) < 0) + drv, ddef->fns, ddef->flags)) < 0) goto done; if (ddef->words && - (error = p_regcomp( - &drv->word_pattern, ddef->words, ddef->flags | P_REG_EXTENDED))) - { - error = git_error_set_regex(&drv->word_pattern, error); + (error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0) goto done; - } if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0) goto done; @@ -316,11 +313,10 @@ static int git_diff_driver_load( goto done; if (!ce || !ce->value) /* no diff.<driver>.wordregex, so just continue */; - else if (!(error = p_regcomp(&drv->word_pattern, ce->value, P_REG_EXTENDED))) + else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0))) found_driver = true; else { /* TODO: warn about bad regex instead of failure */ - error = git_error_set_regex(&drv->word_pattern, error); goto done; } @@ -400,10 +396,10 @@ void git_diff_driver_free(git_diff_driver *driver) return; for (i = 0; i < git_array_size(driver->fn_patterns); ++i) - p_regfree(& git_array_get(driver->fn_patterns, i)->re); + git_regexp_dispose(& git_array_get(driver->fn_patterns, i)->re); git_array_clear(driver->fn_patterns); - p_regfree(&driver->word_pattern); + git_regexp_dispose(&driver->word_pattern); git__free(driver); } @@ -451,19 +447,19 @@ static int diff_context_line__pattern_match( git_diff_driver *driver, git_buf *line) { size_t i, maxi = git_array_size(driver->fn_patterns); - p_regmatch_t pmatch[2]; + git_regmatch pmatch[2]; for (i = 0; i < maxi; ++i) { git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i); - if (!p_regexec(&pat->re, line->ptr, 2, pmatch, 0)) { + if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) { if (pat->flags & REG_NEGATE) return false; /* use pmatch data to trim line data */ - i = (pmatch[1].rm_so >= 0) ? 1 : 0; - git_buf_consume(line, git_buf_cstr(line) + pmatch[i].rm_so); - git_buf_truncate(line, pmatch[i].rm_eo - pmatch[i].rm_so); + i = (pmatch[1].start >= 0) ? 1 : 0; + git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start); + git_buf_truncate(line, pmatch[i].end - pmatch[i].start); git_buf_rtrim(line); return true; diff --git a/src/errors.c b/src/errors.c index 18d6c2dc8..c75f6b17a 100644 --- a/src/errors.c +++ b/src/errors.c @@ -110,21 +110,6 @@ void git_error_set_str(int error_class, const char *string) set_error_from_buffer(error_class); } -int git_error_set_regex(const p_regex_t *regex, int error_code) -{ - char error_buf[1024]; - - assert(error_code); - - p_regerror(error_code, regex, error_buf, sizeof(error_buf)); - git_error_set_str(GIT_ERROR_REGEX, error_buf); - - if (error_code == P_REG_NOMATCH) - return GIT_ENOTFOUND; - - return GIT_EINVALIDSPEC; -} - void git_error_clear(void) { if (GIT_GLOBAL->last_error != NULL) { diff --git a/src/errors.h b/src/errors.h index 86f06f9c7..a2f60f752 100644 --- a/src/errors.h +++ b/src/errors.h @@ -8,7 +8,6 @@ #ifndef INCLUDE_errors_h__ #define INCLUDE_errors_h__ -#include "posix_regex.h" #include "common.h" /* @@ -18,12 +17,6 @@ void git_error_set(int error_class, const char *fmt, ...) GIT_FORMAT_PRINTF(2, 3 void git_error_vset(int error_class, const char *fmt, va_list ap); /** - * Set the error message for a regex failure, using the internal regex - * error code lookup and return a libgit error code. - */ -int git_error_set_regex(const p_regex_t *regex, int error_code); - -/** * Set error message for user callback if needed. * * If the error code in non-zero and no error message is set, this diff --git a/src/posix_regex.h b/src/posix_regex.h deleted file mode 100644 index 421ffeba1..000000000 --- a/src/posix_regex.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) the libgit2 contributors. All rights reserved. - * - * This file is part of libgit2, distributed under the GNU GPL v2 with - * a Linking Exception. For full terms see the included COPYING file. - */ -#ifndef INCLUDE_posix_regex_h__ -#define INCLUDE_posix_regex_h__ - -#include "common.h" - -/* - * Regular expressions: if we were asked to use PCRE (either our - * bundled version or a system version) then use their regcomp - * compatible implementation. - */ - -#ifdef GIT_REGEX_BUILTIN - -# include "pcreposix.h" - -# define P_REG_EXTENDED PCRE_REG_EXTENDED -# define P_REG_ICASE PCRE_REG_ICASE -# define P_REG_NOMATCH PCRE_REG_NOMATCH - -# define p_regex_t pcre_regex_t -# define p_regmatch_t pcre_regmatch_t -# define p_regcomp pcre_regcomp -# define p_regerror pcre_regerror -# define p_regexec pcre_regexec -# define p_regfree pcre_regfree - -/* - * Use the system-provided `regex` routines, whether that's via the - * PCRE emulation layer, or libc, preferring `regcomp_l` it's available. - */ - -#else - -# if defined(GIT_REGEX_PCRE2) -# include <pcre2posix.h> -# elif defined(GIT_REGEX_PCRE) -# include <pcreposix.h> -# else -# include <regex.h> -# endif - -# define P_REG_EXTENDED REG_EXTENDED -# define P_REG_ICASE REG_ICASE -# define P_REG_NOMATCH REG_NOMATCH - -# define p_regex_t regex_t -# define p_regmatch_t regmatch_t - -# define p_regerror regerror -# define p_regexec regexec -# define p_regfree regfree - -# ifdef GIT_REGEX_REGCOMP_L -# include <xlocale.h> - -GIT_INLINE(int) p_regcomp(p_regex_t *preg, const char *pattern, int cflags) -{ - return regcomp_l(preg, pattern, cflags, (locale_t) 0); -} - -# else -# define p_regcomp regcomp -# endif /* GIT_REGEX_REGCOMP_L */ - -#endif - -#endif diff --git a/src/regexp.c b/src/regexp.c new file mode 100644 index 000000000..05ed1907c --- /dev/null +++ b/src/regexp.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "regexp.h" + +#if defined(GIT_REGEX_BUILTIN) || defined(GIT_REGEX_PCRE) + +int git_regexp_compile(git_regexp *r, const char *pattern, int flags) +{ + int erroffset, cflags = 0; + const char *error; + + if (flags & GIT_REGEXP_ICASE) + cflags |= PCRE_CASELESS; + + if ((*r = pcre_compile(pattern, cflags, &error, &erroffset, NULL)) == NULL) { + git_error_set_str(GIT_ERROR_REGEX, error); + return GIT_EINVALIDSPEC; + } + + return 0; +} + +void git_regexp_dispose(git_regexp *r) +{ + pcre_free(*r); + *r = NULL; +} + +int git_regexp_match(const git_regexp *r, const char *string) +{ + int error; + if ((error = pcre_exec(*r, NULL, string, (int) strlen(string), 0, 0, NULL, 0)) < 0) + return (error == PCRE_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + return 0; +} + +int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches) +{ + int static_ovec[9], *ovec; + int error; + size_t i; + + /* The ovec array always needs to be a mutiple of three */ + if (nmatches <= ARRAY_SIZE(static_ovec) / 3) + ovec = static_ovec; + else + ovec = git__calloc(nmatches * 3, sizeof(*ovec)); + GIT_ERROR_CHECK_ALLOC(ovec); + + if ((error = pcre_exec(*r, NULL, string, (int) strlen(string), 0, 0, ovec, (int) nmatches * 3)) < 0) + goto out; + + if (error == 0) + error = (int) nmatches; + + for (i = 0; i < (unsigned int) error; i++) { + matches[i].start = (ovec[i * 2] < 0) ? -1 : ovec[i * 2]; + matches[i].end = (ovec[i * 2 + 1] < 0) ? -1 : ovec[i * 2 + 1]; + } + for (i = (unsigned int) error; i < nmatches; i++) + matches[i].start = matches[i].end = -1; + +out: + if (nmatches > ARRAY_SIZE(static_ovec) / 3) + git__free(ovec); + if (error < 0) + return (error == PCRE_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + return 0; +} + +#elif defined(GIT_REGEX_PCRE2) + +int git_regexp_compile(git_regexp *r, const char *pattern, int flags) +{ + unsigned char errmsg[1024]; + unsigned long erroff; + int error, cflags = 0; + + if (flags & GIT_REGEXP_ICASE) + cflags |= PCRE2_CASELESS; + + if ((*r = pcre2_compile((const unsigned char *) pattern, PCRE2_ZERO_TERMINATED, + cflags, &error, &erroff, NULL)) == NULL) { + pcre2_get_error_message(error, errmsg, sizeof(errmsg)); + git_error_set_str(GIT_ERROR_REGEX, (char *) errmsg); + return GIT_EINVALIDSPEC; + } + + return 0; +} + +void git_regexp_dispose(git_regexp *r) +{ + pcre2_code_free(*r); + *r = NULL; +} + +int git_regexp_match(const git_regexp *r, const char *string) +{ + pcre2_match_data *data; + int error; + + data = pcre2_match_data_create(1, NULL); + GIT_ERROR_CHECK_ALLOC(data); + + if ((error = pcre2_match(*r, (const unsigned char *) string, strlen(string), + 0, 0, data, NULL)) < 0) + return (error == PCRE2_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + + pcre2_match_data_free(data); + return 0; +} + +int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches) +{ + pcre2_match_data *data = NULL; + PCRE2_SIZE *ovec; + int error; + size_t i; + + if ((data = pcre2_match_data_create(nmatches, NULL)) == NULL) { + git_error_set_oom(); + goto out; + } + + if ((error = pcre2_match(*r, (const unsigned char *) string, strlen(string), + 0, 0, data, NULL)) < 0) + goto out; + + if (error == 0 || (unsigned int) error > nmatches) + error = nmatches; + ovec = pcre2_get_ovector_pointer(data); + + for (i = 0; i < (unsigned int) error; i++) { + matches[i].start = (ovec[i * 2] == PCRE2_UNSET) ? -1 : (ssize_t) ovec[i * 2]; + matches[i].end = (ovec[i * 2 + 1] == PCRE2_UNSET) ? -1 : (ssize_t) ovec[i * 2 + 1]; + } + for (i = (unsigned int) error; i < nmatches; i++) + matches[i].start = matches[i].end = -1; + +out: + pcre2_match_data_free(data); + if (error < 0) + return (error == PCRE2_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + return 0; +} + +#elif defined(GIT_REGEX_REGCOMP) || defined(GIT_REGEX_REGCOMP_L) + +#if defined(GIT_REGEX_REGCOMP_L) +# include <xlocale.h> +#endif + +int git_regexp_compile(git_regexp *r, const char *pattern, int flags) +{ + int cflags = REG_EXTENDED, error; + char errmsg[1024]; + + if (flags & GIT_REGEXP_ICASE) + cflags |= REG_ICASE; + +# if defined(GIT_REGEX_REGCOMP) + if ((error = regcomp(r, pattern, cflags)) != 0) +# else + if ((error = regcomp_l(r, pattern, cflags, (locale_t) 0)) != 0) +# endif + { + regerror(error, r, errmsg, sizeof(errmsg)); + git_error_set_str(GIT_ERROR_REGEX, errmsg); + return GIT_EINVALIDSPEC; + } + + return 0; +} + +void git_regexp_dispose(git_regexp *r) +{ + regfree(r); +} + +int git_regexp_match(const git_regexp *r, const char *string) +{ + int error; + if ((error = regexec(r, string, 0, NULL, 0)) != 0) + return (error == REG_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + return 0; +} + +int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches) +{ + regmatch_t static_m[3], *m; + int error; + size_t i; + + if (nmatches <= ARRAY_SIZE(static_m)) + m = static_m; + else + m = git__calloc(nmatches, sizeof(*m)); + + if ((error = regexec(r, string, nmatches, m, 0)) != 0) + goto out; + + for (i = 0; i < nmatches; i++) { + matches[i].start = (m[i].rm_so < 0) ? -1 : m[i].rm_so; + matches[i].end = (m[i].rm_eo < 0) ? -1 : m[i].rm_eo; + } + +out: + if (nmatches > ARRAY_SIZE(static_m)) + git__free(m); + if (error) + return (error == REG_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC; + return 0; +} + +#endif diff --git a/src/regexp.h b/src/regexp.h new file mode 100644 index 000000000..2592ef383 --- /dev/null +++ b/src/regexp.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#ifndef INCLUDE_regexp_h__ +#define INCLUDE_regexp_h__ + +#include "common.h" + +#if defined(GIT_REGEX_BUILTIN) || defined(GIT_REGEX_PCRE) +# include "pcre.h" +typedef pcre *git_regexp; +# define GIT_REGEX_INIT NULL +#elif defined(GIT_REGEX_PCRE2) +# define PCRE2_CODE_UNIT_WIDTH 8 +# include <pcre2.h> +typedef pcre2_code *git_regexp; +# define GIT_REGEX_INIT NULL +#elif defined(GIT_REGEX_REGCOMP) || defined(GIT_REGEX_REGCOMP_L) +# include <regex.h> +typedef regex_t git_regexp; +# define GIT_REGEX_INIT { 0 } +#else +# error "No regex backend" +#endif + +/** Options supported by @git_regexp_compile. */ +typedef enum { + /** Enable case-insensitive matching */ + GIT_REGEXP_ICASE = (1 << 0) +} git_regexp_flags_t; + +/** Structure containing information about regular expression matching groups */ +typedef struct { + /** Start of the given match. -1 if the group didn't match anything */ + ssize_t start; + /** End of the given match. -1 if the group didn't match anything */ + ssize_t end; +} git_regmatch; + +/** + * Compile a regular expression. The compiled expression needs to + * be cleaned up afterwards with `git_regexp_dispose`. + * + * @param r Pointer to the storage where to initialize the regular expression. + * @param pattern The pattern that shall be compiled. + * @param flags Flags to alter how the pattern shall be handled. + * 0 for defaults, otherwise see @git_regexp_flags_t. + * @return 0 on success, otherwise a negative return value. + */ +int git_regexp_compile(git_regexp *r, const char *pattern, int flags); + +/** + * Free memory associated with the regular expression + * + * @param r The regular expression structure to dispose. + */ +void git_regexp_dispose(git_regexp *r); + +/** + * Test whether a given string matches a compiled regular + * expression. + * + * @param r Compiled regular expression. + * @param string String to match against the regular expression. + * @return 0 if the string matches, a negative error code + * otherwise. GIT_ENOTFOUND if no match was found, + * GIT_EINVALIDSPEC if the regular expression matching + * was invalid. + */ +int git_regexp_match(const git_regexp *r, const char *string); + +/** + * Search for matches inside of a given string. + * + * Given a regular expression with capturing groups, this + * function will populate provided @git_regmatch structures with + * offsets for each of the given matches. Non-matching groups + * will have start and end values of the respective @git_regmatch + * structure set to -1. + * + * @param r Compiled regular expression. + * @param string String to match against the regular expression. + * @param nmatches Number of @git_regmatch structures provided by + * the user. + * @param matches Pointer to an array of @git_regmatch structures. + * @return 0 if the string matches, a negative error code + * otherwise. GIT_ENOTFOUND if no match was found, + * GIT_EINVALIDSPEC if the regular expression matching + * was invalid. + */ +int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches); + +#endif diff --git a/src/revparse.c b/src/revparse.c index 4bde0d7f2..2618b38ee 100644 --- a/src/revparse.c +++ b/src/revparse.c @@ -12,6 +12,7 @@ #include "buffer.h" #include "tree.h" #include "refdb.h" +#include "regexp.h" #include "git2.h" @@ -42,7 +43,7 @@ static int maybe_abbrev(git_object** out, git_repository *repo, const char *spec return maybe_sha_or_abbrev(out, repo, spec, speclen); } -static int build_regex(p_regex_t *regex, const char *pattern) +static int build_regex(git_regexp *regex, const char *pattern) { int error; @@ -51,13 +52,11 @@ static int build_regex(p_regex_t *regex, const char *pattern) return GIT_EINVALIDSPEC; } - error = p_regcomp(regex, pattern, P_REG_EXTENDED); + error = git_regexp_compile(regex, pattern, 0); if (!error) return 0; - error = git_error_set_regex(regex, error); - - p_regfree(regex); + git_regexp_dispose(regex); return error; } @@ -66,7 +65,7 @@ static int maybe_describe(git_object**out, git_repository *repo, const char *spe { const char *substr; int error; - p_regex_t regex; + git_regexp regex; substr = strstr(spec, "-g"); @@ -76,8 +75,8 @@ static int maybe_describe(git_object**out, git_repository *repo, const char *spe if (build_regex(®ex, ".+-[0-9]+-g[0-9a-fA-F]+") < 0) return -1; - error = p_regexec(®ex, spec, 0, NULL, 0); - p_regfree(®ex); + error = git_regexp_match(®ex, spec); + git_regexp_dispose(®ex); if (error) return GIT_ENOTFOUND; @@ -143,12 +142,11 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out, { git_reference *ref = NULL; git_reflog *reflog = NULL; - p_regex_t preg; + git_regexp preg; int error = -1; size_t i, numentries, cur; const git_reflog_entry *entry; const char *msg; - p_regmatch_t regexmatches[2]; git_buf buf = GIT_BUF_INIT; cur = position; @@ -168,12 +166,14 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out, numentries = git_reflog_entrycount(reflog); for (i = 0; i < numentries; i++) { + git_regmatch regexmatches[2]; + entry = git_reflog_entry_byindex(reflog, i); msg = git_reflog_entry_message(entry); if (!msg) continue; - if (p_regexec(&preg, msg, 2, regexmatches, 0)) + if (git_regexp_search(&preg, msg, 2, regexmatches) < 0) continue; cur--; @@ -181,7 +181,8 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out, if (cur > 0) continue; - git_buf_put(&buf, msg+regexmatches[1].rm_so, regexmatches[1].rm_eo - regexmatches[1].rm_so); + if ((git_buf_put(&buf, msg+regexmatches[1].start, regexmatches[1].end - regexmatches[1].start)) < 0) + goto cleanup; if ((error = git_reference_dwim(base_ref, repo, git_buf_cstr(&buf))) == 0) goto cleanup; @@ -199,7 +200,7 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out, cleanup: git_reference_free(ref); git_buf_dispose(&buf); - p_regfree(&preg); + git_regexp_dispose(&preg); git_reflog_free(reflog); return error; } @@ -448,7 +449,7 @@ cleanup: return error; } -static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex) +static int walk_and_search(git_object **out, git_revwalk *walk, git_regexp *regex) { int error; git_oid oid; @@ -460,7 +461,7 @@ static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex if ((error < 0) && (error != GIT_ENOTFOUND)) return -1; - if (!p_regexec(regex, git_commit_message((git_commit*)obj), 0, NULL, 0)) { + if (!git_regexp_match(regex, git_commit_message((git_commit*)obj))) { *out = obj; return 0; } @@ -476,7 +477,7 @@ static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex static int handle_grep_syntax(git_object **out, git_repository *repo, const git_oid *spec_oid, const char *pattern) { - p_regex_t preg; + git_regexp preg; git_revwalk *walk = NULL; int error; @@ -497,7 +498,7 @@ static int handle_grep_syntax(git_object **out, git_repository *repo, const git_ error = walk_and_search(out, walk, &preg); cleanup: - p_regfree(&preg); + git_regexp_dispose(&preg); git_revwalk_free(walk); return error; diff --git a/src/userdiff.h b/src/userdiff.h index 8bde6303f..c9a80d712 100644 --- a/src/userdiff.h +++ b/src/userdiff.h @@ -7,6 +7,8 @@ #ifndef INCLUDE_userdiff_h__ #define INCLUDE_userdiff_h__ +#include "regexp.h" + /* * This file isolates the built in diff driver function name patterns. * Most of these patterns are taken from Git (with permission from the @@ -29,7 +31,7 @@ typedef struct { #define PATTERNS(NAME, FN_PATS, WORD_PAT) \ { NAME, FN_PATS, WORD_PAT WORD_DEFAULT, 0 } #define IPATTERN(NAME, FN_PATS, WORD_PAT) \ - { NAME, FN_PATS, WORD_PAT WORD_DEFAULT, P_REG_ICASE } + { NAME, FN_PATS, WORD_PAT WORD_DEFAULT, GIT_REGEXP_ICASE } /* * The table of diff driver patterns diff --git a/tests/core/posix.c b/tests/core/posix.c index dcc619f22..77ac65ad6 100644 --- a/tests/core/posix.c +++ b/tests/core/posix.c @@ -9,23 +9,12 @@ # endif #endif -#include <locale.h> - #include "clar_libgit2.h" #include "futils.h" #include "posix.h" -#include "userdiff.h" - -#if LC_ALL > 0 -static const char *old_locales[LC_ALL]; -#endif void test_core_posix__initialize(void) { -#if LC_ALL > 0 - memset(&old_locales, 0, sizeof(old_locales)); -#endif - #ifdef GIT_WIN32 /* on win32, the WSA context needs to be initialized * before any socket calls can be performed */ @@ -156,115 +145,6 @@ void test_core_posix__utimes(void) cl_must_pass(p_unlink("foo")); } -static void try_set_locale(int category) -{ -#if LC_ALL > 0 - old_locales[category] = setlocale(category, NULL); -#endif - - if (!setlocale(category, "UTF-8") && - !setlocale(category, "c.utf8") && - !setlocale(category, "en_US.UTF-8")) - cl_skip(); - - if (MB_CUR_MAX == 1) - cl_fail("Expected locale to be switched to multibyte"); -} - -void test_core_posix__p_regcomp_ignores_global_locale_ctype(void) -{ - p_regex_t preg; - - try_set_locale(LC_CTYPE); - - cl_assert(!p_regcomp(&preg, "[\xc0-\xff][\x80-\xbf]", P_REG_EXTENDED)); - - p_regfree(&preg); -} - -void test_core_posix__p_regcomp_ignores_global_locale_collate(void) -{ - p_regex_t preg; - -#ifdef GIT_WIN32 - cl_skip(); -#endif - - try_set_locale(LC_COLLATE); - cl_assert(!p_regcomp(&preg, "[\xc0-\xff][\x80-\xbf]", P_REG_EXTENDED)); - - p_regfree(&preg); -} - -void test_core_posix__p_regcomp_matches_digits_with_locale(void) -{ - p_regex_t preg; - char c, str[2]; - -#ifdef GIT_WIN32 - cl_skip(); -#endif - - try_set_locale(LC_COLLATE); - try_set_locale(LC_CTYPE); - - cl_assert(!p_regcomp(&preg, "[[:digit:]]", P_REG_EXTENDED)); - - str[1] = '\0'; - for (c = '0'; c <= '9'; c++) { - str[0] = c; - cl_assert(!p_regexec(&preg, str, 0, NULL, 0)); - } - - p_regfree(&preg); -} - -void test_core_posix__p_regcomp_matches_alphabet_with_locale(void) -{ - p_regex_t preg; - char c, str[2]; - -#ifdef GIT_WIN32 - cl_skip(); -#endif - - try_set_locale(LC_COLLATE); - try_set_locale(LC_CTYPE); - - cl_assert(!p_regcomp(&preg, "[[:alpha:]]", P_REG_EXTENDED)); - - str[1] = '\0'; - for (c = 'a'; c <= 'z'; c++) { - str[0] = c; - cl_assert(!p_regexec(&preg, str, 0, NULL, 0)); - } - for (c = 'A'; c <= 'Z'; c++) { - str[0] = c; - cl_assert(!p_regexec(&preg, str, 0, NULL, 0)); - } - - p_regfree(&preg); -} - -void test_core_posix__p_regcomp_compile_userdiff_regexps(void) -{ - size_t idx; - - for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) { - git_diff_driver_definition ddef = builtin_defs[idx]; - int error = 0; - p_regex_t preg; - - error = p_regcomp(&preg, ddef.fns, P_REG_EXTENDED | ddef.flags); - p_regfree(&preg); - cl_assert(!error); - - error = p_regcomp(&preg, ddef.words, P_REG_EXTENDED); - p_regfree(&preg); - cl_assert(!error); - } -} - void test_core_posix__unlink_removes_symlink(void) { if (!git_path_supports_symlinks(clar_sandbox_path())) diff --git a/tests/core/regexp.c b/tests/core/regexp.c new file mode 100644 index 000000000..8db5641e5 --- /dev/null +++ b/tests/core/regexp.c @@ -0,0 +1,213 @@ +#include "clar_libgit2.h" + +#include <locale.h> + +#include "regexp.h" +#include "userdiff.h" + +#if LC_ALL > 0 +static const char *old_locales[LC_ALL]; +#endif + +static git_regexp regex; + +void test_core_regexp__initialize(void) +{ +#if LC_ALL > 0 + memset(&old_locales, 0, sizeof(old_locales)); +#endif +} + +void test_core_regexp__cleanup(void) +{ + git_regexp_dispose(®ex); +} + +static void try_set_locale(int category) +{ +#if LC_ALL > 0 + old_locales[category] = setlocale(category, NULL); +#endif + + if (!setlocale(category, "UTF-8") && + !setlocale(category, "c.utf8") && + !setlocale(category, "en_US.UTF-8")) + cl_skip(); + + if (MB_CUR_MAX == 1) + cl_fail("Expected locale to be switched to multibyte"); +} + + +void test_core_regexp__compile_ignores_global_locale_ctype(void) +{ + try_set_locale(LC_CTYPE); + cl_git_pass(git_regexp_compile(®ex, "[\xc0-\xff][\x80-\xbf]", 0)); +} + +void test_core_regexp__compile_ignores_global_locale_collate(void) +{ +#ifdef GIT_WIN32 + cl_skip(); +#endif + + try_set_locale(LC_COLLATE); + cl_git_pass(git_regexp_compile(®ex, "[\xc0-\xff][\x80-\xbf]", 0)); +} + +void test_core_regexp__regex_matches_digits_with_locale(void) +{ + char c, str[2]; + +#ifdef GIT_WIN32 + cl_skip(); +#endif + + try_set_locale(LC_COLLATE); + try_set_locale(LC_CTYPE); + + cl_git_pass(git_regexp_compile(®ex, "[[:digit:]]", 0)); + + str[1] = '\0'; + for (c = '0'; c <= '9'; c++) { + str[0] = c; + cl_git_pass(git_regexp_match(®ex, str)); + } +} + +void test_core_regexp__regex_matches_alphabet_with_locale(void) +{ + char c, str[2]; + +#ifdef GIT_WIN32 + cl_skip(); +#endif + + try_set_locale(LC_COLLATE); + try_set_locale(LC_CTYPE); + + cl_git_pass(git_regexp_compile(®ex, "[[:alpha:]]", 0)); + + str[1] = '\0'; + for (c = 'a'; c <= 'z'; c++) { + str[0] = c; + cl_git_pass(git_regexp_match(®ex, str)); + } + for (c = 'A'; c <= 'Z'; c++) { + str[0] = c; + cl_git_pass(git_regexp_match(®ex, str)); + } +} + +void test_core_regexp__compile_userdiff_regexps(void) +{ + size_t idx; + + for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) { + git_diff_driver_definition ddef = builtin_defs[idx]; + + cl_git_pass(git_regexp_compile(®ex, ddef.fns, ddef.flags)); + git_regexp_dispose(®ex); + + cl_git_pass(git_regexp_compile(®ex, ddef.words, 0)); + git_regexp_dispose(®ex); + } +} + +void test_core_regexp__simple_search_matches(void) +{ + cl_git_pass(git_regexp_compile(®ex, "a", 0)); + cl_git_pass(git_regexp_search(®ex, "a", 0, NULL)); +} + +void test_core_regexp__case_insensitive_search_matches(void) +{ + cl_git_pass(git_regexp_compile(®ex, "a", GIT_REGEXP_ICASE)); + cl_git_pass(git_regexp_search(®ex, "A", 0, NULL)); +} + +void test_core_regexp__nonmatching_search_returns_error(void) +{ + cl_git_pass(git_regexp_compile(®ex, "a", 0)); + cl_git_fail(git_regexp_search(®ex, "b", 0, NULL)); +} + +void test_core_regexp__search_finds_complete_match(void) +{ + git_regmatch matches[1]; + + cl_git_pass(git_regexp_compile(®ex, "abc", 0)); + cl_git_pass(git_regexp_search(®ex, "abc", 1, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 3); +} + +void test_core_regexp__search_finds_correct_offsets(void) +{ + git_regmatch matches[3]; + + cl_git_pass(git_regexp_compile(®ex, "(a*)(b*)", 0)); + cl_git_pass(git_regexp_search(®ex, "ab", 3, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 2); + cl_assert_equal_i(matches[1].start, 0); + cl_assert_equal_i(matches[1].end, 1); + cl_assert_equal_i(matches[2].start, 1); + cl_assert_equal_i(matches[2].end, 2); +} + +void test_core_regexp__search_finds_empty_group(void) +{ + git_regmatch matches[3]; + + cl_git_pass(git_regexp_compile(®ex, "(a*)(b*)c", 0)); + cl_git_pass(git_regexp_search(®ex, "ac", 3, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 2); + cl_assert_equal_i(matches[1].start, 0); + cl_assert_equal_i(matches[1].end, 1); + cl_assert_equal_i(matches[2].start, 1); + cl_assert_equal_i(matches[2].end, 1); +} + +void test_core_regexp__search_fills_matches_with_first_matching_groups(void) +{ + git_regmatch matches[2]; + + cl_git_pass(git_regexp_compile(®ex, "(a)(b)(c)", 0)); + cl_git_pass(git_regexp_search(®ex, "abc", 2, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 3); + cl_assert_equal_i(matches[1].start, 0); + cl_assert_equal_i(matches[1].end, 1); +} + +void test_core_regexp__search_skips_nonmatching_group(void) +{ + git_regmatch matches[4]; + + cl_git_pass(git_regexp_compile(®ex, "(a)(b)?(c)", 0)); + cl_git_pass(git_regexp_search(®ex, "ac", 4, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 2); + cl_assert_equal_i(matches[1].start, 0); + cl_assert_equal_i(matches[1].end, 1); + cl_assert_equal_i(matches[2].start, -1); + cl_assert_equal_i(matches[2].end, -1); + cl_assert_equal_i(matches[3].start, 1); + cl_assert_equal_i(matches[3].end, 2); +} + +void test_core_regexp__search_initializes_trailing_nonmatching_groups(void) +{ + git_regmatch matches[3]; + + cl_git_pass(git_regexp_compile(®ex, "(a)bc", 0)); + cl_git_pass(git_regexp_search(®ex, "abc", 3, matches)); + cl_assert_equal_i(matches[0].start, 0); + cl_assert_equal_i(matches[0].end, 3); + cl_assert_equal_i(matches[1].start, 0); + cl_assert_equal_i(matches[1].end, 1); + cl_assert_equal_i(matches[2].start, -1); + cl_assert_equal_i(matches[2].end, -1); +} |
