summaryrefslogtreecommitdiff
path: root/src/indexer.c
diff options
context:
space:
mode:
authorVicent Martí <tanoku@gmail.com>2011-08-03 18:59:30 -0700
committerVicent Martí <tanoku@gmail.com>2011-08-03 18:59:30 -0700
commit31bf5f38575fdffb27326a2b2bae88d096bb0071 (patch)
treeff0d57473b6ab5f4510e9b8969f9c64e96c232f7 /src/indexer.c
parent20c1bca1234b7e726ef5826ed329cd466a32e405 (diff)
parent65cb1586c45b6ca2e74753b93e8677edcae903ae (diff)
downloadlibgit2-31bf5f38575fdffb27326a2b2bae88d096bb0071.tar.gz
Merge pull request #345 from carlosmn/gsoc2011/indexer
Implement a pack indexer
Diffstat (limited to 'src/indexer.c')
-rw-r--r--src/indexer.c380
1 files changed, 380 insertions, 0 deletions
diff --git a/src/indexer.c b/src/indexer.c
new file mode 100644
index 000000000..7a2b28ae3
--- /dev/null
+++ b/src/indexer.c
@@ -0,0 +1,380 @@
+/*
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * In addition to the permissions in the GNU General Public License,
+ * the authors give you unlimited permission to link the compiled
+ * version of this file into combinations with other programs,
+ * and to distribute those combinations without any restriction
+ * coming from the use of this file. (The General Public License
+ * restrictions do apply in other respects; for example, they cover
+ * modification of the file, and distribution when not linked into
+ * a combined executable.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "git2/indexer.h"
+#include "git2/object.h"
+#include "git2/zlib.h"
+#include "git2/oid.h"
+
+#include "common.h"
+#include "pack.h"
+#include "mwindow.h"
+#include "posix.h"
+#include "pack.h"
+#include "filebuf.h"
+#include "sha1.h"
+
+#define UINT31_MAX (0x7FFFFFFF)
+
+struct entry {
+ git_oid oid;
+ uint32_t crc;
+ uint32_t offset;
+ uint64_t offset_long;
+};
+
+typedef struct git_indexer {
+ struct git_pack_file *pack;
+ struct stat st;
+ struct git_pack_header hdr;
+ size_t nr_objects;
+ git_vector objects;
+ git_filebuf file;
+ unsigned int fanout[256];
+ git_oid hash;
+} git_indexer;
+
+const git_oid *git_indexer_hash(git_indexer *idx)
+{
+ return &idx->hash;
+}
+
+static int parse_header(git_indexer *idx)
+{
+ int error;
+
+ /* Verify we recognize this pack file format. */
+ if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS)
+ return git__rethrow(error, "Failed to read in pack header");
+
+ if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE))
+ return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature");
+
+ if (!pack_version_ok(idx->hdr.hdr_version))
+ return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version");
+
+
+ return GIT_SUCCESS;
+}
+
+int objects_cmp(const void *a, const void *b)
+{
+ const struct entry *entrya = a;
+ const struct entry *entryb = b;
+
+ return git_oid_cmp(&entrya->oid, &entryb->oid);
+}
+
+int git_indexer_new(git_indexer **out, const char *packname)
+{
+ git_indexer *idx;
+ unsigned int namelen;
+ int ret, error;
+
+ if (git_path_root(packname) < 0)
+ return git__throw(GIT_EINVALIDPATH, "Path is not absolute");
+
+ idx = git__malloc(sizeof(git_indexer));
+ if (idx == NULL)
+ return GIT_ENOMEM;
+
+ memset(idx, 0x0, sizeof(*idx));
+
+ namelen = strlen(packname);
+ idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1);
+ if (idx->pack == NULL)
+ goto cleanup;
+
+ memset(idx->pack, 0x0, sizeof(struct git_pack_file));
+ memcpy(idx->pack->pack_name, packname, namelen + 1);
+
+ ret = p_stat(packname, &idx->st);
+ if (ret < 0) {
+ if (errno == ENOENT)
+ error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found");
+ else
+ error = git__throw(GIT_EOSERR, "Failed to stat packfile.");
+
+ goto cleanup;
+ }
+
+ ret = p_open(idx->pack->pack_name, O_RDONLY);
+ if (ret < 0) {
+ error = git__throw(GIT_EOSERR, "Failed to open packfile");
+ goto cleanup;
+ }
+
+ idx->pack->mwf.fd = ret;
+ idx->pack->mwf.size = idx->st.st_size;
+
+ error = parse_header(idx);
+ if (error < GIT_SUCCESS) {
+ error = git__rethrow(error, "Failed to parse packfile header");
+ goto cleanup;
+ }
+
+ idx->nr_objects = ntohl(idx->hdr.hdr_entries);
+
+ error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
+ if (error < GIT_SUCCESS) {
+ goto cleanup;
+ }
+
+ *out = idx;
+
+ return GIT_SUCCESS;
+
+cleanup:
+ git_indexer_free(idx);
+
+ return error;
+}
+
+static void index_path(char *path, git_indexer *idx)
+{
+ char *ptr;
+ const char prefix[] = "pack-", suffix[] = ".idx\0";
+
+ ptr = strrchr(path, '/') + 1;
+
+ memcpy(ptr, prefix, STRLEN(prefix));
+ ptr += STRLEN(prefix);
+ git_oid_fmt(ptr, &idx->hash);
+ ptr += GIT_OID_HEXSZ;
+ memcpy(ptr, suffix, STRLEN(suffix));
+}
+
+int git_indexer_write(git_indexer *idx)
+{
+ git_mwindow *w = NULL;
+ int error, namelen;
+ unsigned int i, long_offsets, left;
+ struct git_pack_idx_header hdr;
+ char filename[GIT_PATH_MAX];
+ struct entry *entry;
+ void *packfile_hash;
+ git_oid file_hash;
+ SHA_CTX ctx;
+
+ git_vector_sort(&idx->objects);
+
+ namelen = strlen(idx->pack->pack_name);
+ memcpy(filename, idx->pack->pack_name, namelen);
+ memcpy(filename + namelen - STRLEN("pack"), "idx\0", STRLEN("idx\0"));
+
+ error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS);
+
+ /* Write out the header */
+ hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
+ hdr.idx_version = htonl(2);
+ error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
+
+ /* Write out the fanout table */
+ for (i = 0; i < 256; ++i) {
+ uint32_t n = htonl(idx->fanout[i]);
+ error = git_filebuf_write(&idx->file, &n, sizeof(n));
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+ }
+
+ /* Write out the object names (SHA-1 hashes) */
+ SHA1_Init(&ctx);
+ git_vector_foreach(&idx->objects, i, entry) {
+ error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
+ SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+ }
+ SHA1_Final(idx->hash.id, &ctx);
+
+ /* Write out the CRC32 values */
+ git_vector_foreach(&idx->objects, i, entry) {
+ error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+ }
+
+ /* Write out the offsets */
+ git_vector_foreach(&idx->objects, i, entry) {
+ uint32_t n;
+
+ if (entry->offset == UINT32_MAX)
+ n = htonl(0x80000000 | long_offsets++);
+ else
+ n = htonl(entry->offset);
+
+ error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+ }
+
+ /* Write out the long offsets */
+ git_vector_foreach(&idx->objects, i, entry) {
+ uint32_t split[2];
+
+ if (entry->offset != UINT32_MAX)
+ continue;
+
+ split[0] = htonl(entry->offset_long >> 32);
+ split[1] = htonl(entry->offset_long & 0xffffffff);
+
+ error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+ }
+
+ /* Write out the packfile trailer */
+
+ packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+ if (packfile_hash == NULL) {
+ error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
+ goto cleanup;
+ }
+
+ memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
+
+ git_mwindow_close(&w);
+
+ error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
+
+ /* Write out the index sha */
+ error = git_filebuf_hash(&file_hash, &idx->file);
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+
+ error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
+ if (error < GIT_SUCCESS)
+ goto cleanup;
+
+ /* Figure out what the final name should be */
+ index_path(filename, idx);
+ /* Commit file */
+ error = git_filebuf_commit_at(&idx->file, filename);
+
+cleanup:
+ if (error < GIT_SUCCESS)
+ git_filebuf_cleanup(&idx->file);
+
+ return error;
+}
+
+int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
+{
+ git_mwindow_file *mwf;
+ off_t off = sizeof(struct git_pack_header);
+ int error;
+ struct entry *entry;
+ unsigned int left, processed;
+
+ assert(idx && stats);
+
+ mwf = &idx->pack->mwf;
+ error = git_mwindow_file_register(mwf);
+ if (error < GIT_SUCCESS)
+ return git__rethrow(error, "Failed to register mwindow file");
+
+ stats->total = idx->nr_objects;
+ stats->processed = processed = 0;
+
+ while (processed < idx->nr_objects) {
+ git_rawobj obj;
+ git_oid oid;
+ git_mwindow *w = NULL;
+ char hdr[512] = {0}; /* FIXME: How long should this be? */
+ int i, hdr_len;
+ off_t entry_start = off;
+ void *packed;
+ size_t entry_size;
+
+ entry = git__malloc(sizeof(struct entry));
+ memset(entry, 0x0, sizeof(struct entry));
+
+ if (off > UINT31_MAX) {
+ entry->offset = UINT32_MAX;
+ entry->offset_long = off;
+ } else {
+ entry->offset = off;
+ }
+
+ error = git_packfile_unpack(&obj, idx->pack, &off);
+ if (error < GIT_SUCCESS) {
+ error = git__rethrow(error, "Failed to unpack object");
+ goto cleanup;
+ }
+
+ error = git_odb__hash_obj(&oid, hdr, sizeof(hdr), &hdr_len, &obj);
+ if (error < GIT_SUCCESS) {
+ error = git__rethrow(error, "Failed to hash object");
+ goto cleanup;
+ }
+
+ git_oid_cpy(&entry->oid, &oid);
+ entry->crc = crc32(0L, Z_NULL, 0);
+
+ entry_size = off - entry_start;
+ packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
+ if (packed == NULL) {
+ error = git__rethrow(error, "Failed to open window to read packed data");
+ goto cleanup;
+ }
+ entry->crc = htonl(crc32(entry->crc, packed, entry_size));
+ git_mwindow_close(&w);
+
+ /* Add the object to the list */
+ error = git_vector_insert(&idx->objects, entry);
+ if (error < GIT_SUCCESS) {
+ error = git__rethrow(error, "Failed to add entry to list");
+ goto cleanup;
+ }
+
+ for (i = oid.id[0]; i < 256; ++i) {
+ idx->fanout[i]++;
+ }
+
+ free(obj.data);
+
+ stats->processed = ++processed;
+ }
+
+cleanup:
+ git_mwindow_free_all(mwf);
+
+ return error;
+
+}
+
+void git_indexer_free(git_indexer *idx)
+{
+ unsigned int i;
+ struct entry *e;
+
+ p_close(idx->pack->mwf.fd);
+ git_vector_foreach(&idx->objects, i, e)
+ free(e);
+ git_vector_free(&idx->objects);
+ free(idx->pack);
+ free(idx);
+}
+