summaryrefslogtreecommitdiff
path: root/src/object.c
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2014-05-12 14:38:39 -0700
committerRussell Belfer <rb@github.com>2014-05-15 14:11:19 -0700
commit575f107704255254f52d197240d55f2030af0454 (patch)
treeaf5149788297f3e9ec17992b342a3005eab36e31 /src/object.c
parent88b1b36dfcc6b406f2b6f21e0e042071984b3b90 (diff)
downloadlibgit2-rb/object-parse-flexibility.tar.gz
Add lax parsing for commit and tag objectsrb/object-parse-flexibility
This changes the behavior of object parsing for commits and tags so that even when bad data is found inside the object, we will continue to try to parse as much of the object as we can. The existing functions (`git_object_lookup` for example) will still delete the partially parsed object before returning an error, but this also adds a new function `git_object_lookup_lax` that will still return the error, but will also return the object with the partial data (if we got far enough along in the parsing process to even create the base object).
Diffstat (limited to 'src/object.c')
-rw-r--r--src/object.c380
1 files changed, 291 insertions, 89 deletions
diff --git a/src/object.c b/src/object.c
index 93068b85f..2e0f0e5d4 100644
--- a/src/object.c
+++ b/src/object.c
@@ -13,6 +13,7 @@
#include "tree.h"
#include "blob.h"
#include "tag.h"
+#include "signature.h"
static const int OBJECT_BASE_SIZE = 4096;
@@ -48,26 +49,36 @@ static git_object_def git_objects_table[] = {
{ "REF_DELTA", 0, NULL, NULL },
};
+static int git_object__match_cache(git_otype type, git_otype cached)
+{
+ if (type == GIT_OBJ_ANY || type == cached)
+ return 0;
+
+ giterr_set(
+ GITERR_INVALID,
+ "Requested object type (%s) does not match type in ODB (%s)",
+ git_object_type2string(type), git_object_type2string(cached));
+ return GIT_ENOTFOUND;
+}
+
int git_object__from_odb_object(
- git_object **object_out,
+ git_object **out,
git_repository *repo,
git_odb_object *odb_obj,
- git_otype type)
+ git_otype type,
+ bool lax)
{
int error;
size_t object_size;
git_object_def *def;
git_object *object = NULL;
- assert(object_out);
- *object_out = NULL;
+ assert(out);
+ *out = NULL;
/* Validate type match */
- if (type != GIT_OBJ_ANY && type != odb_obj->cached.type) {
- giterr_set(GITERR_INVALID,
- "The requested type does not match the type in the ODB");
- return GIT_ENOTFOUND;
- }
+ if ((error = git_object__match_cache(type, odb_obj->cached.type)) < 0)
+ return error;
if ((object_size = git_object__size(odb_obj->cached.type)) == 0) {
giterr_set(GITERR_INVALID, "The requested type is invalid");
@@ -87,10 +98,14 @@ int git_object__from_odb_object(
def = &git_objects_table[odb_obj->cached.type];
assert(def->free && def->parse);
- if ((error = def->parse(object, odb_obj)) < 0)
- def->free(object);
- else
- *object_out = git_cache_store_parsed(&repo->objects, object);
+ if ((error = def->parse(object, odb_obj)) < 0) {
+ if (lax) /* do not put invalid objects into cache */
+ *out = object;
+ else
+ def->free(object);
+ } else {
+ *out = git_cache_store_parsed(&repo->objects, object);
+ }
return error;
}
@@ -106,27 +121,33 @@ void git_object__free(void *obj)
git_objects_table[type].free(obj);
}
-int git_object_lookup_prefix(
- git_object **object_out,
+static int object_lookup(
+ git_object **out,
git_repository *repo,
const git_oid *id,
size_t len,
- git_otype type)
+ git_otype type,
+ bool lax)
{
- git_object *object = NULL;
+ int error = 0;
git_odb *odb = NULL;
git_odb_object *odb_obj = NULL;
- int error = 0;
- assert(repo && object_out && id);
+ assert(repo && out && id);
if (len < GIT_OID_MINPREFIXLEN) {
- giterr_set(GITERR_OBJECT, "Ambiguous lookup - OID prefix is too short");
+ giterr_set(GITERR_OBJECT,
+ "Ambiguous lookup - OID prefix is too short (%d)", (int)len);
return GIT_EAMBIGUOUS;
}
- error = git_repository_odb__weakptr(&odb, repo);
- if (error < 0)
+ if (type != GIT_OBJ_ANY && !git_object__size(type)) {
+ giterr_set(
+ GITERR_INVALID, "The requested type (%d) is invalid", (int)type);
+ return GIT_ENOTFOUND;
+ }
+
+ if ((error = git_repository_odb__weakptr(&odb, repo)) < 0)
return error;
if (len > GIT_OID_HEXSZ)
@@ -135,77 +156,88 @@ int git_object_lookup_prefix(
if (len == GIT_OID_HEXSZ) {
git_cached_obj *cached = NULL;
- /* We want to match the full id : we can first look up in the cache,
- * since there is no need to check for non ambiguousity
- */
+ /* Full id: first look in cache, since there is no ambiguity */
cached = git_cache_get_any(&repo->objects, id);
- if (cached != NULL) {
- if (cached->flags == GIT_CACHE_STORE_PARSED) {
- object = (git_object *)cached;
-
- if (type != GIT_OBJ_ANY && type != object->cached.type) {
- git_object_free(object);
- giterr_set(GITERR_INVALID,
- "The requested type does not match the type in ODB");
- return GIT_ENOTFOUND;
- }
-
- *object_out = object;
- return 0;
- } else if (cached->flags == GIT_CACHE_STORE_RAW) {
- odb_obj = (git_odb_object *)cached;
- } else {
- assert(!"Wrong caching type in the global object cache");
- }
- } else {
- /* Object was not found in the cache, let's explore the backends.
- * We could just use git_odb_read_unique_short_oid,
- * it is the same cost for packed and loose object backends,
- * but it may be much more costly for sqlite and hiredis.
- */
+
+ if (!cached)
+ /* Object not found in cache, so search backends */
error = git_odb_read(&odb_obj, odb, id);
+ else if (cached->flags == GIT_CACHE_STORE_PARSED) {
+ if ((error = git_object__match_cache(type, cached->type)) < 0)
+ git_object_free((git_object *)cached);
+ else
+ *out = (git_object *)cached;
+ return error;
}
+ else if (cached->flags == GIT_CACHE_STORE_RAW)
+ odb_obj = (git_odb_object *)cached;
+ else
+ assert(!"Wrong caching type in the global object cache");
} else {
- git_oid short_oid;
+ git_oid short_oid = {{0}};
- /* We copy the first len*4 bits from id and fill the remaining with 0s */
+ /* Copy first len*4 bits from id and fill the remaining with 0s */
memcpy(short_oid.id, id->id, (len + 1) / 2);
if (len % 2)
short_oid.id[len / 2] &= 0xF0;
- memset(short_oid.id + (len + 1) / 2, 0, (GIT_OID_HEXSZ - len) / 2);
-
- /* If len < GIT_OID_HEXSZ (a strict short oid was given), we have
- * 2 options :
- * - We always search in the cache first. If we find that short oid is
- * ambiguous, we can stop. But in all the other cases, we must then
- * explore all the backends (to find an object if there was match,
- * or to check that oid is not ambiguous if we have found 1 match in
- * the cache)
- * - We never explore the cache, go right to exploring the backends
- * We chose the latter : we explore directly the backends.
+
+ /* If len < GIT_OID_HEXSZ (short oid), we have 2 options:
+ *
+ * - We always search in the cache first. If we find that short
+ * oid is ambiguous, we can stop. But in all the other cases, we
+ * must then explore all the backends (to find an object if
+ * there was match, or to check that oid is not ambiguous if we
+ * have found 1 match in the cache)
+ *
+ * - We never explore the cache, go right to exploring the
+ * backends We chose the latter : we explore directly the
+ * backends.
*/
error = git_odb_read_prefix(&odb_obj, odb, &short_oid, len);
}
- if (error < 0)
- return error;
-
- error = git_object__from_odb_object(object_out, repo, odb_obj, type);
+ if (!error) {
+ error = git_object__from_odb_object(out, repo, odb_obj, type, lax);
- git_odb_object_free(odb_obj);
+ git_odb_object_free(odb_obj);
+ }
return error;
}
-int git_object_lookup(git_object **object_out, git_repository *repo, const git_oid *id, git_otype type) {
- return git_object_lookup_prefix(object_out, repo, id, GIT_OID_HEXSZ, type);
+int git_object_lookup(
+ git_object **out,
+ git_repository *repo,
+ const git_oid *id,
+ git_otype type)
+{
+ return object_lookup(out, repo, id, GIT_OID_HEXSZ, type, false);
+}
+
+int git_object_lookup_prefix(
+ git_object **out,
+ git_repository *repo,
+ const git_oid *id,
+ size_t len,
+ git_otype type)
+{
+ return object_lookup(out, repo, id, len, type, false);
+}
+
+int git_object_lookup_lax(
+ git_object **out,
+ git_repository *repo,
+ const git_oid *id,
+ size_t len,
+ git_otype type)
+{
+ return object_lookup(out, repo, id, len, type, true);
}
void git_object_free(git_object *object)
{
if (object == NULL)
return;
-
git_cached_obj_decref(object);
}
@@ -235,16 +267,21 @@ const char *git_object_type2string(git_otype type)
return git_objects_table[type].str;
}
-git_otype git_object_string2type(const char *str)
+git_otype git_object_string2type(const char *str, size_t len)
{
size_t i;
if (!str || !*str)
return GIT_OBJ_BAD;
+ if (!len)
+ len = strlen(str);
+
+ for (i = 0; i < ARRAY_SIZE(git_objects_table); i++) {
+ size_t typelen = strlen(git_objects_table[i].str);
- for (i = 0; i < ARRAY_SIZE(git_objects_table); i++)
- if (!strcmp(str, git_objects_table[i].str))
+ if (len >= typelen && !memcmp(str, git_objects_table[i].str, len))
return (git_otype)i;
+ }
return GIT_OBJ_BAD;
}
@@ -364,28 +401,25 @@ int git_object_dup(git_object **dest, git_object *source)
}
int git_object_lookup_bypath(
- git_object **out,
- const git_object *treeish,
- const char *path,
- git_otype type)
+ git_object **out,
+ const git_object *treeish,
+ const char *path,
+ git_otype type)
{
- int error = -1;
- git_tree *tree = NULL;
+ int error = 0;
+ git_object *tree = NULL;
git_tree_entry *entry = NULL;
assert(out && treeish && path);
- if ((error = git_object_peel((git_object**)&tree, treeish, GIT_OBJ_TREE)) < 0 ||
- (error = git_tree_entry_bypath(&entry, tree, path)) < 0)
- {
+ if ((error = git_object_peel(&tree, treeish, GIT_OBJ_TREE)) < 0 ||
+ (error = git_tree_entry_bypath(&entry, (git_tree *)tree, path)) < 0)
goto cleanup;
- }
- if (type != GIT_OBJ_ANY && git_tree_entry_type(entry) != type)
- {
- giterr_set(GITERR_OBJECT,
- "object at path '%s' is not of the asked-for type %d",
- path, type);
+ if (type != GIT_OBJ_ANY && git_tree_entry_type(entry) != type) {
+ giterr_set(
+ GITERR_OBJECT, "object at path '%s' is not a %s (%d)",
+ path, git_object_type2string(type), type);
error = GIT_EINVALIDSPEC;
goto cleanup;
}
@@ -394,7 +428,8 @@ int git_object_lookup_bypath(
cleanup:
git_tree_entry_free(entry);
- git_tree_free(tree);
+ git_object_free(tree);
+
return error;
}
@@ -440,3 +475,170 @@ int git_object_short_id(git_buf *out, const git_object *obj)
return error;
}
+static int object_parse_error(
+ git_otype otype, git_object_parse_t *item, const char *msg)
+{
+ const char *typestr = git_object_type2string(otype);
+
+ if (item->tag)
+ giterr_set(GITERR_OBJECT, "Failed to parse %s - %s '%s'",
+ typestr, msg, item->tag);
+ else
+ giterr_set(GITERR_OBJECT, "Failed to parse %s - %s", typestr, msg);
+
+ return -1;
+}
+
+static int object_parse_line(
+ git_otype otype,
+ git_object_parse_t *item,
+ const char *buf,
+ const char *eol,
+ int error)
+{
+ size_t len;
+ const char *msg = NULL;
+
+ buf += item->taglen + 1;
+
+ if (eol <= buf) {
+ msg = "insufficient data for";
+ goto done;
+ } else
+ len = (size_t)(eol - buf);
+
+ switch (item->type) {
+ case GIT_PARSE_OID:
+ case GIT_PARSE_OID_ARRAY: {
+ git_oid *id = (item->type == GIT_PARSE_OID) ?
+ item->value.id : git_array_alloc(*item->value.ids);
+
+ if (!id)
+ msg = "out of memory";
+ else if (len < GIT_OID_HEXSZ)
+ msg = "insufficient data for";
+ else if (git_oid_fromstr(id, buf) < 0)
+ msg = "invalid OID in";
+ else if (len > GIT_OID_HEXSZ + 1)
+ msg = "extra data after";
+ else if (buf[GIT_OID_HEXSZ] != '\n')
+ msg = "improper termination for";
+ break;
+ }
+ case GIT_PARSE_OTYPE:
+ if ((*item->value.otype = git_object_string2type(buf, len)) ==
+ GIT_OBJ_BAD)
+ msg = "invalid value for";
+ break;
+ case GIT_PARSE_SIGNATURE:
+ *item->value.sig = git__calloc(1, sizeof(git_signature));
+ if (!*item->value.sig)
+ msg = "out of memory";
+ else if (git_signature__parse(
+ *item->value.sig, &buf, eol + 1, NULL, '\n') < 0)
+ msg = "invalid signature for";
+ break;
+ case GIT_PARSE_TO_EOL:
+ if (eol[-1] == '\r')
+ --len;
+ if ((*item->value.text = git__strndup(buf, len)) == NULL)
+ msg = "out of memory";
+ break;
+ default:
+ msg = "unexpected parse type";
+ break;
+ }
+
+done:
+ if (msg && !error)
+ error = object_parse_error(otype, item, msg);
+ return error;
+}
+
+int git_object__parse_lines(
+ git_otype otype,
+ git_object_parse_t *parse,
+ const char *buf,
+ const char *buf_end)
+{
+ int error = 0;
+ bool optional = false;
+ char *eol;
+ git_object_parse_t *scan = parse, *next = parse + 1;
+ size_t len;
+
+ /* process required and optional lines */
+ for (; buf < buf_end && scan->type > GIT_PARSE_BODY; scan = (next++)) {
+ len = buf_end - buf;
+
+ if (scan->type == GIT_PARSE_MODE_OPTIONAL) {
+ optional = true;
+ continue;
+ }
+
+ if (git__iseol(buf, buf_end - buf))
+ goto body;
+
+ if ((eol = memchr(buf, '\n', buf_end - buf)) == NULL) {
+ if (!error)
+ error = object_parse_error(otype, scan, "unterminated line");
+ break;
+ }
+ len = (size_t)(eol - buf);
+
+ if (len > scan->taglen &&
+ !memcmp(scan->tag, buf, scan->taglen) &&
+ buf[scan->taglen] == ' ')
+ {
+ error = object_parse_line(otype, scan, buf, eol, error);
+
+ if (scan->type == GIT_PARSE_OID_ARRAY) /* don't advance yet */
+ next = scan;
+ }
+ else if (optional)
+ /* for now, skip this tag - eventually search tags? */
+ next = scan;
+ else if (scan->type == GIT_PARSE_OID_ARRAY)
+ continue;
+ else if (!error)
+ error = object_parse_error(
+ otype, scan, "missing required field");
+
+ buf = eol + 1; /* advance to next line */
+ }
+
+body:
+
+ if (scan->type > GIT_PARSE_BODY) {
+ if (!optional && !error)
+ error = object_parse_error
+ (otype, scan, "missing required field");
+
+ while (scan->type > GIT_PARSE_BODY)
+ scan++;
+ }
+
+ if (scan->type > GIT_PARSE_BODY)
+ return error;
+
+ while (buf < buf_end && !git__iseol(buf, buf_end - buf)) {
+ if ((eol = memchr(buf, '\n', buf_end - buf)) == NULL)
+ buf = buf_end;
+ else
+ buf = eol + 1;
+ }
+
+ if (buf < buf_end)
+ buf += (*buf == '\n') ? 1 : 2;
+ else {
+ buf = buf_end;
+
+ if (!error && scan->type != GIT_PARSE_BODY_OPTIONAL)
+ error = object_parse_error(otype, scan, "missing message body");
+ }
+
+ if (scan->value.body)
+ *scan->value.body = buf;
+
+ return error;
+}