summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/blob.c156
-rw-r--r--src/buffer.c2
-rw-r--r--src/filter.c132
-rw-r--r--src/filter.h40
4 files changed, 281 insertions, 49 deletions
diff --git a/src/blob.c b/src/blob.c
index 4065ffa12..57a31041e 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -11,6 +11,7 @@
#include "common.h"
#include "blob.h"
+#include "filter.h"
const void *git_blob_rawcontent(git_blob *blob)
{
@@ -65,15 +66,101 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b
return GIT_SUCCESS;
}
+static int write_file_stream(git_oid *oid, git_odb *odb, const char *path, git_off_t file_size)
+{
+ int fd, error;
+ char buffer[4096];
+ git_odb_stream *stream = NULL;
+
+ if ((error = git_odb_open_wstream(&stream, odb, file_size, GIT_OBJ_BLOB)) < GIT_SUCCESS)
+ return error;
+
+ if ((fd = p_open(path, O_RDONLY)) < 0) {
+ error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", path);
+ goto cleanup;
+ }
+
+ while (file_size > 0) {
+ ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
+
+ if (read_len < 0) {
+ error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
+ p_close(fd);
+ goto cleanup;
+ }
+
+ stream->write(stream, buffer, read_len);
+ file_size -= read_len;
+ }
+
+ p_close(fd);
+ error = stream->finalize_write(oid, stream);
+
+cleanup:
+ stream->free(stream);
+ return error;
+}
+
+static int write_file_filtered(
+ git_oid *oid,
+ git_odb *odb,
+ const char *path,
+ git_vector *filters)
+{
+ int error;
+ git_buf file_in = GIT_BUF_INIT;
+ git_buf filter_result = GIT_BUF_INIT;
+
+ error = git_futils_readbuffer(&file_in, path);
+ if (error < GIT_SUCCESS)
+ return error;
+
+ error = git_filter__apply(&filter_result, &file_in, filters, path);
+
+ if (error < GIT_SUCCESS) {
+ git_buf_free(&file_in);
+ git_buf_free(&filter_result);
+ return error;
+ }
+
+ error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB);
+
+ git_buf_free(&file_in);
+ git_buf_free(&filter_result);
+
+ return GIT_SUCCESS;
+}
+
+static int write_symlink(git_oid *oid, git_odb *odb, const char *path, size_t link_size)
+{
+ char *link_data;
+ ssize_t read_len;
+ int error;
+
+ link_data = git__malloc(link_size);
+ if (!link_data)
+ return GIT_ENOMEM;
+
+ read_len = p_readlink(path, link_data, link_size);
+
+ if (read_len != (ssize_t)link_size) {
+ free(link_data);
+ return git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink");
+ }
+
+ error = git_odb_write(oid, odb, (void *)link_data, link_size, GIT_OBJ_BLOB);
+ free(link_data);
+ return error;
+}
+
int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *path)
{
int error = GIT_SUCCESS;
git_buf full_path = GIT_BUF_INIT;
git_off_t size;
- git_odb_stream *stream = NULL;
struct stat st;
const char *workdir;
- git_odb *odb;
+ git_odb *odb = NULL;
workdir = git_repository_workdir(repo);
if (workdir == NULL)
@@ -95,63 +182,36 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
if (error < GIT_SUCCESS)
goto cleanup;
- if ((error = git_odb_open_wstream(&stream, odb, (size_t)size, GIT_OBJ_BLOB)) < GIT_SUCCESS)
- goto cleanup;
-
if (S_ISLNK(st.st_mode)) {
- char *link_data;
- ssize_t read_len;
-
- link_data = git__malloc((size_t)size);
- if (!link_data) {
- error = GIT_ENOMEM;
- goto cleanup;
- }
-
- read_len = p_readlink(full_path.ptr, link_data, (size_t)size);
-
- if (read_len != (ssize_t)size) {
- error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink");
- free(link_data);
- goto cleanup;
- }
-
- stream->write(stream, link_data, (size_t)size);
- free(link_data);
-
+ error = write_symlink(oid, odb, full_path.ptr, (size_t)size);
} else {
- int fd;
- char buffer[2048];
+ git_vector write_filters = GIT_VECTOR_INIT;
- if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) {
- error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr);
+ if ((error = git_filter__load_for_file(
+ &write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
goto cleanup;
- }
-
- while (size > 0) {
- ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
- if (read_len < 0) {
- error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file");
- p_close(fd);
- goto cleanup;
- }
-
- stream->write(stream, buffer, read_len);
- size -= read_len;
+ if (write_filters.length == 0) {
+ error = write_file_stream(oid, odb, full_path.ptr, size);
+ } else {
+ error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
}
- p_close(fd);
+ /*
+ * TODO: eventually support streaming filtered files, for files which are bigger
+ * than a given threshold. This is not a priority because applying a filter in
+ * streaming mode changes the final size of the blob, and without knowing its
+ * final size, the blob cannot be written in stream mode to the ODB.
+ *
+ * The plan is to do streaming writes to a tempfile on disk and then opening
+ * streaming that file to the ODB, using `write_file_stream`.
+ *
+ * CAREFULLY DESIGNED APIS YO
+ */
}
- error = stream->finalize_write(oid, stream);
-
cleanup:
- if (stream)
- stream->free(stream);
-
git_buf_free(&full_path);
-
return error;
}
diff --git a/src/buffer.c b/src/buffer.c
index b9f62cc30..e86246f94 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -235,7 +235,7 @@ char *git_buf_detach(git_buf *buf)
{
char *data = buf->ptr;
- if (buf->asize <= 0)
+ if (buf->asize == 0 || buf->ptr == &git_buf__oom)
return NULL;
git_buf_init(buf, 0);
diff --git a/src/filter.c b/src/filter.c
new file mode 100644
index 000000000..b97ac6697
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2009-2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "fileops.h"
+#include "hash.h"
+#include "filter.h"
+
+/* Fresh from Core Git. I wonder what we could use this for... */
+void git_text__stat(git_text_stats *stats, git_buf *text)
+{
+ size_t i;
+
+ memset(stats, 0, sizeof(*stats));
+
+ for (i = 0; i < text->size; i++) {
+ unsigned char c = text->ptr[i];
+
+ if (c == '\r') {
+ stats->cr++;
+
+ if (i + 1 < text->size && text->ptr[i + 1] == '\n')
+ stats->crlf++;
+
+ continue;
+ }
+
+ if (c == '\n') {
+ stats->lf++;
+ continue;
+ }
+
+ if (c == 127)
+ /* DEL */
+ stats->nonprintable++;
+
+ else if (c < 32) {
+ switch (c) {
+ /* BS, HT, ESC and FF */
+ case '\b': case '\t': case '\033': case '\014':
+ stats->printable++;
+ break;
+ case 0:
+ stats->nul++;
+ /* fall through */
+ default:
+ stats->nonprintable++;
+ }
+ }
+ else
+ stats->printable++;
+ }
+
+ /* If file ends with EOF then don't count this EOF as non-printable. */
+ if (text->size >= 1 && text->ptr[text->size - 1] == '\032')
+ stats->nonprintable--;
+}
+
+/*
+ * Fresh from Core Git
+ */
+int git_text__is_binary(git_text_stats *stats)
+{
+ if (stats->nul)
+ return 1;
+
+ if ((stats->printable >> 7) < stats->nonprintable)
+ return 1;
+ /*
+ * Other heuristics? Average line length might be relevant,
+ * as might LF vs CR vs CRLF counts..
+ *
+ * NOTE! It might be normal to have a low ratio of CRLF to LF
+ * (somebody starts with a LF-only file and edits it with an editor
+ * that adds CRLF only to lines that are added..). But do we
+ * want to support CR-only? Probably not.
+ */
+ return 0;
+}
+
+int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode)
+{
+ /* We don't load any filters yet. HAHA */
+ return 0;
+}
+
+int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename)
+{
+ unsigned int src, dst, i;
+ git_buf *dbuffer[2];
+
+ dbuffer[0] = source;
+ dbuffer[1] = dest;
+
+ src = 0;
+
+ /* Pre-grow the destination buffer to more or less the size
+ * we expect it to have */
+ if (git_buf_grow(dest, source->size) < 0)
+ return GIT_ENOMEM;
+
+ for (i = 0; i < filters->length; ++i) {
+ git_filter_cb filter = git_vector_get(filters, i);
+ dst = (src + 1) % 2;
+
+ git_buf_clear(dbuffer[dst]);
+
+ /* Apply the filter, from dbuffer[src] to dbuffer[dst];
+ * if the filtering is canceled by the user mid-filter,
+ * we skip to the next filter without changing the source
+ * of the double buffering (so that the text goes through
+ * cleanly).
+ */
+ if (filter(dbuffer[dst], dbuffer[src], filename) == 0) {
+ src = (src + 1) % 2;
+ }
+
+ if (git_buf_oom(dbuffer[dst]))
+ return GIT_ENOMEM;
+ }
+
+ /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
+ if (dst != 1) {
+ git_buf_swap(dest, source);
+ }
+
+ return GIT_SUCCESS;
+}
diff --git a/src/filter.h b/src/filter.h
new file mode 100644
index 000000000..9a8f84972
--- /dev/null
+++ b/src/filter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2009-2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_filter_h__
+#define INCLUDE_filter_h__
+
+#include "common.h"
+#include "buffer.h"
+#include "git2/odb.h"
+#include "git2/repository.h"
+
+typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename);
+
+typedef enum {
+ GIT_FILTER_TO_WORKTREE,
+ GIT_FILTER_TO_ODB
+} git_filter_mode;
+
+typedef struct {
+ /* NUL, CR, LF and CRLF counts */
+ unsigned int nul, cr, lf, crlf;
+
+ /* These are just approximations! */
+ unsigned int printable, nonprintable;
+} git_text_stats;
+
+extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
+extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename);
+
+/* Gather stats for a piece of text */
+extern void git_text__stat(git_text_stats *stats, git_buf *text);
+
+/* Heuristics on a set of text stats to check whether it's binary
+ * text or not */
+extern int git_text__is_binary(git_text_stats *stats);
+
+#endif