diff options
-rw-r--r-- | git-compat-util.h | 11 | ||||
-rw-r--r-- | refs.c | 234 | ||||
-rwxr-xr-x | t/t1402-check-ref-format.sh | 15 | ||||
-rw-r--r-- | t/valgrind/default.supp | 8 |
4 files changed, 18 insertions, 250 deletions
diff --git a/git-compat-util.h b/git-compat-util.h index 26e92f19cf..f587749b7c 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -731,17 +731,6 @@ void git_qsort(void *base, size_t nmemb, size_t size, #endif #endif -#if defined(__GNUC__) && defined(__x86_64__) -#include <emmintrin.h> -/* - * This is the system memory page size; it's used so that we can read - * outside the bounds of an allocation without segfaulting. - */ -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif -#endif - #ifdef UNRELIABLE_FSTAT #define fstat_is_reliable() 0 #else @@ -7,27 +7,21 @@ /* * How to handle various characters in refnames: - * This table is used by both the SIMD and non-SIMD code. It has - * some cases that are only useful for the SIMD; these are handled - * equivalently to the listed disposition in the non-SIMD code. * 0: An acceptable character for refs - * 1: @, look for a following { to reject @{ in refs (SIMD or = 0) - * 2: \0: End-of-component and string - * 3: /: End-of-component (SIMD or = 2) - * 4: ., look for a preceding . to reject .. in refs - * 5: {, look for a preceding @ to reject @{ in refs - * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7) - * 7: A bad character except * (see check_refname_component below) + * 1: End-of-component + * 2: ., look for a preceding . to reject .. in refs + * 3: {, look for a preceding @ to reject @{ in refs + * 4: A bad character: ASCII control characters, "~", "^", ":" or SP */ static unsigned char refname_disposition[256] = { - 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 4, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 7, 0, + 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 7 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4 }; /* @@ -39,9 +33,8 @@ static unsigned char refname_disposition[256] = { * - any path component of it begins with ".", or * - it has double dots "..", or * - it has ASCII control character, "~", "^", ":" or SP, anywhere, or - * - it has pattern-matching notation "*", "?", "[", anywhere, or - * - it ends with a "/", or - * - it ends with ".lock", or + * - it ends with a "/". + * - it ends with ".lock" * - it contains a "\" (backslash) */ static int check_refname_component(const char *refname, int flags) @@ -53,19 +46,17 @@ static int check_refname_component(const char *refname, int flags) int ch = *cp & 255; unsigned char disp = refname_disposition[ch]; switch (disp) { - case 2: /* fall-through */ - case 3: + case 1: goto out; - case 4: + case 2: if (last == '.') return -1; /* Refname contains "..". */ break; - case 5: + case 3: if (last == '@') return -1; /* Refname contains "@{". */ break; - case 6: /* fall-through */ - case 7: + case 4: return -1; } last = ch; @@ -88,7 +79,7 @@ out: return cp - refname; } -static int check_refname_format_bytewise(const char *refname, int flags) +int check_refname_format(const char *refname, int flags) { int component_len, component_count = 0; @@ -124,195 +115,6 @@ static int check_refname_format_bytewise(const char *refname, int flags) return 0; } -#if defined(__GNUC__) && defined(__x86_64__) -#define SSE_VECTOR_BYTES 16 - -/* Vectorized version of check_refname_format. */ -int check_refname_format(const char *refname, int flags) -{ - const char *cp = refname; - - const __m128i dot = _mm_set1_epi8('.'); - const __m128i at = _mm_set1_epi8('@'); - const __m128i curly = _mm_set1_epi8('{'); - const __m128i slash = _mm_set1_epi8('/'); - const __m128i zero = _mm_set1_epi8('\000'); - const __m128i el = _mm_set1_epi8('l'); - - /* below '*', all characters are forbidden or rare */ - const __m128i star_ub = _mm_set1_epi8('*' + 1); - - const __m128i colon = _mm_set1_epi8(':'); - const __m128i question = _mm_set1_epi8('?'); - - /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */ - const __m128i bracket_lb = _mm_set1_epi8('[' - 1); - const __m128i caret_ub = _mm_set1_epi8('^' + 1); - - /* '~' and above are forbidden */ - const __m128i tilde_lb = _mm_set1_epi8('~' - 1); - - int component_count = 0; - - if (refname[0] == 0 || refname[0] == '/') { - /* entirely empty ref or initial ref component */ - return -1; - } - - /* - * Initial ref component of '.'; below we look for /. so we'll - * miss this. - */ - if (refname[0] == '.') { - if (refname[1] == '/' || refname[1] == '\0') - return -1; - if (!(flags & REFNAME_DOT_COMPONENT)) - return -1; - } - while(1) { - __m128i tmp, tmp1, result; - uint64_t mask; - - if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1) - /* - * End-of-page; fall back to slow method for - * this entire ref. - */ - return check_refname_format_bytewise(refname, flags); - - tmp = _mm_loadu_si128((__m128i *)cp); - tmp1 = _mm_loadu_si128((__m128i *)(cp + 1)); - - /* - * This range (note the lt) contains some - * permissible-but-rare characters (including all - * characters >= 128), which we handle later. It also - * includes \000. - */ - result = _mm_cmplt_epi8(tmp, star_ub); - - result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question)); - result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon)); - - /* This range contains the permissible ] as bycatch */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpgt_epi8(tmp, bracket_lb), - _mm_cmplt_epi8(tmp, caret_ub))); - - result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb)); - - /* .. */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, dot), - _mm_cmpeq_epi8(tmp1, dot))); - /* @{ */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, at), - _mm_cmpeq_epi8(tmp1, curly))); - /* // */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, slash), - _mm_cmpeq_epi8(tmp1, slash))); - /* trailing / */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, slash), - _mm_cmpeq_epi8(tmp1, zero))); - /* .l, beginning of .lock */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, dot), - _mm_cmpeq_epi8(tmp1, el))); - /* - * Even though /. is not necessarily an error, we flag - * it anyway. If we find it, we'll check if it's valid - * and if so we'll advance just past it. - */ - result = _mm_or_si128(result, _mm_and_si128( - _mm_cmpeq_epi8(tmp, slash), - _mm_cmpeq_epi8(tmp1, dot))); - - mask = _mm_movemask_epi8(result); - if (mask) { - /* - * We've found either end-of-string, or some - * probably-bad character or substring. - */ - int i = __builtin_ctz(mask); - switch (refname_disposition[cp[i] & 255]) { - case 0: /* fall-through */ - case 5: - /* - * bycatch: a good character that's in - * one of the ranges of mostly-forbidden - * characters - */ - cp += i + 1; - break; - case 1: - if (cp[i + 1] == '{') - return -1; - cp += i + 1; - break; - case 2: - if (!(flags & REFNAME_ALLOW_ONELEVEL) - && !component_count && !strchr(refname, '/')) - /* Refname has only one component. */ - return -1; - return 0; - case 3: - component_count ++; - /* - * Even if leading dots are allowed, don't - * allow "." as a component (".." is - * prevented by case 4 below). - */ - if (cp[i + 1] == '.') { - if (cp[i + 2] == '\0') - return -1; - if (flags & REFNAME_DOT_COMPONENT) { - /* skip to just after the /. */ - cp += i + 2; - break; - } - return -1; - } else if (cp[i + 1] == '/' || cp[i + 1] == '\0') - return -1; - break; - case 4: - if (cp[i + 1] == '.' || cp[i + 1] == '\0') - return -1; - /* .lock as end-of-component or end-of-string */ - if ((!strncmp(cp + i, ".lock", 5)) - && (cp[i + 5] == '/' || cp[i + 5] == 0)) - return -1; - cp += 1; - break; - case 6: - if (((cp == refname + i) || cp[i - 1] == '/') - && (cp[i + 1] == '/' || cp[i + 1] == 0)) - if (flags & REFNAME_REFSPEC_PATTERN) { - flags &= ~REFNAME_REFSPEC_PATTERN; - /* restart after the * */ - cp += i + 1; - continue; - } - /* fall-through */ - case 7: - return -1; - } - } else - cp += SSE_VECTOR_BYTES; - } -} - -#else - -int check_refname_format (const char *refname, int flags) -{ - return check_refname_format_bytewise(refname, flags); -} - -#endif - struct ref_entry; /* diff --git a/t/t1402-check-ref-format.sh b/t/t1402-check-ref-format.sh index 9aeb352b3d..1a5a5f39fd 100755 --- a/t/t1402-check-ref-format.sh +++ b/t/t1402-check-ref-format.sh @@ -64,7 +64,6 @@ valid_ref "$(printf 'heads/fu\303\237')" invalid_ref 'heads/*foo/bar' --refspec-pattern invalid_ref 'heads/foo*/bar' --refspec-pattern invalid_ref 'heads/f*o/bar' --refspec-pattern -invalid_ref 'heads/foo*//bar' --refspec-pattern ref='foo' invalid_ref "$ref" @@ -129,20 +128,6 @@ valid_ref NOT_MINGW "$ref" '--allow-onelevel --normalize' invalid_ref NOT_MINGW "$ref" '--refspec-pattern --normalize' valid_ref NOT_MINGW "$ref" '--refspec-pattern --allow-onelevel --normalize' - -valid_ref 'refs/heads/a-very-long-refname' -invalid_ref 'refs/heads/.a-very-long-refname' -invalid_ref 'refs/heads/abcdefgh0123..' -invalid_ref 'refs/heads/abcdefgh01234..' -invalid_ref 'refs/heads/abcdefgh012345..' -invalid_ref 'refs/heads/abcdefgh0123456..' -invalid_ref 'refs/heads/abcdefgh01234567..' -valid_ref 'refs/heads/abcdefgh0123.a' -valid_ref 'refs/heads/abcdefgh01234.a' -valid_ref 'refs/heads/abcdefgh012345.a' -valid_ref 'refs/heads/abcdefgh0123456.a' -valid_ref 'refs/heads/abcdefgh01234567.a' - test_expect_success "check-ref-format --branch @{-1}" ' T=$(git write-tree) && sha1=$(echo A | git commit-tree $T) && diff --git a/t/valgrind/default.supp b/t/valgrind/default.supp index 332ab1a3b3..0a6724fcc4 100644 --- a/t/valgrind/default.supp +++ b/t/valgrind/default.supp @@ -49,11 +49,3 @@ Memcheck:Addr4 fun:copy_ref } -{ - ignore-sse-check_refname_format - Memcheck:Addr8 - fun:check_refname_format - fun:cmd_check_ref_format - fun:handle_builtin - fun:main -} |