summaryrefslogtreecommitdiff
path: root/src/util.c
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2018-10-18 16:08:46 +0200
committerPatrick Steinhardt <ps@pks.im>2018-10-25 12:52:47 +0200
commit83e8a6b36acc67f2702cbbc7d4e334c7f7737719 (patch)
tree20a332ee3db97373acbcc8501b280ec1ade13ce7 /src/util.c
parentf010b66bf693d22560fd1af584d325a8da42b416 (diff)
downloadlibgit2-83e8a6b36acc67f2702cbbc7d4e334c7f7737719.tar.gz
util: provide `git__memmem` function
Unfortunately, neither the `memmem` nor the `strnstr` functions are part of any C standard but are merely extensions of C that are implemented by e.g. glibc. Thus, there is no standardized way to search for a string in a block of memory with a limited size, and using `strstr` is to be considered unsafe in case where the buffer has not been sanitized. In fact, there are some uses of `strstr` in exactly that unsafe way in our codebase. Provide a new function `git__memmem` that implements the `memmem` semantics. That is in a given haystack of `n` bytes, search for the occurrence of a byte sequence of `m` bytes and return a pointer to the first occurrence. The implementation chosen is the "Not So Naive" algorithm from [1]. It was chosen as the implementation is comparably simple while still being reasonably efficient in most cases. Preprocessing happens in constant time and space, searching has a time complexity of O(n*m) with a slightly sub-linear average case. [1]: http://www-igm.univ-mlv.fr/~lecroq/string/
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/src/util.c b/src/util.c
index 79b362f7f..d06df26f9 100644
--- a/src/util.c
+++ b/src/util.c
@@ -355,6 +355,47 @@ size_t git__linenlen(const char *buffer, size_t buffer_len)
return nl ? (size_t)(nl - buffer) + 1 : buffer_len;
}
+/*
+ * Adapted Not So Naive algorithm from http://www-igm.univ-mlv.fr/~lecroq/string/
+ */
+const void * git__memmem(const void *haystack, size_t haystacklen,
+ const void *needle, size_t needlelen)
+{
+ const char *h, *n;
+ size_t j, k, l;
+
+ if (needlelen > haystacklen || !haystacklen || !needlelen)
+ return NULL;
+
+ h = (const char *) haystack,
+ n = (const char *) needle;
+
+ if (needlelen == 1)
+ return memchr(haystack, *n, haystacklen);
+
+ if (n[0] == n[1]) {
+ k = 2;
+ l = 1;
+ } else {
+ k = 1;
+ l = 2;
+ }
+
+ j = 0;
+ while (j <= haystacklen - needlelen) {
+ if (n[1] != h[j + 1]) {
+ j += k;
+ } else {
+ if (memcmp(n + 2, h + j + 2, needlelen - 2) == 0 &&
+ n[0] == h[j])
+ return h + j;
+ j += l;
+ }
+ }
+
+ return NULL;
+}
+
void git__hexdump(const char *buffer, size_t len)
{
static const size_t LINE_WIDTH = 16;