summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Martín Nieto <cmn@dwim.me>2015-02-19 20:39:20 +0100
committerCarlos Martín Nieto <cmn@dwim.me>2015-02-19 20:39:20 +0100
commitd15884ce840c7eeac54fc408fdc33e03cd01bc32 (patch)
treeefa7f56af1f462b99720040b6fd45bc7bf82d68c
parent818302c8538b5bad2130ff1e454d9e4054e87c91 (diff)
parentfeb0e022867552c039c02fe39db7b7c3d63ae327 (diff)
downloadlibgit2-d15884ce840c7eeac54fc408fdc33e03cd01bc32.tar.gz
Merge pull request #2911 from ethomson/streaming_filters
Streaming filters
-rw-r--r--.travis.yml1
-rw-r--r--appveyor.yml3
-rw-r--r--include/git2/filter.h26
-rw-r--r--include/git2/sys/filter.h12
-rw-r--r--include/git2/types.h9
-rw-r--r--src/attr.c5
-rw-r--r--src/blob.c14
-rw-r--r--src/buffer.c14
-rw-r--r--src/buffer.h6
-rw-r--r--src/checkout.c136
-rw-r--r--src/crlf.c2
-rw-r--r--src/diff.c2
-rw-r--r--src/diff_driver.c9
-rw-r--r--src/diff_file.c2
-rw-r--r--src/filter.c418
-rw-r--r--src/filter.h13
-rw-r--r--src/repository.c2
-rw-r--r--tests/filter/crlf.c2
-rw-r--r--tests/filter/stream.c221
-rw-r--r--tests/repo/init.c2
20 files changed, 728 insertions, 171 deletions
diff --git a/.travis.yml b/.travis.yml
index fc513458b..68b29b1e2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ compiler:
env:
global:
- secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs="
+ - GITTEST_INVASIVE_FS_SIZE=1
matrix:
- OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release"
- OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON"
diff --git a/appveyor.yml b/appveyor.yml
index 8ac6728c3..d155485fd 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -3,7 +3,8 @@ branches:
only:
- master
environment:
- GITTEST_INVASIVE_FILESYSTEM: 1
+ GITTEST_INVASIVE_FS_STRUCTURE: 1
+ GITTEST_INVASIVE_FS_SIZE: 1
matrix:
- GENERATOR: "Visual Studio 11"
diff --git a/include/git2/filter.h b/include/git2/filter.h
index 5b3f40394..dc59e6341 100644
--- a/include/git2/filter.h
+++ b/include/git2/filter.h
@@ -39,9 +39,9 @@ typedef enum {
* Filter option flags.
*/
typedef enum {
- GIT_FILTER_OPT_DEFAULT = 0u,
- GIT_FILTER_OPT_ALLOW_UNSAFE = (1u << 0),
-} git_filter_opt_t;
+ GIT_FILTER_DEFAULT = 0u,
+ GIT_FILTER_ALLOW_UNSAFE = (1u << 0),
+} git_filter_flag_t;
/**
* A filter that can transform file data
@@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list;
* @param blob The blob to which the filter will be applied (if known)
* @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT)
- * @param options Combination of `git_filter_opt_t` flags
+ * @param flags Combination of `git_filter_flag_t` flags
* @return 0 on success (which could still return NULL if no filters are
* needed for the requested file), <0 on error
*/
@@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load(
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
- uint32_t options);
+ uint32_t flags);
/**
* Apply filter list to a data buffer.
@@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob(
git_filter_list *filters,
git_blob *blob);
+GIT_EXTERN(int) git_filter_list_stream_data(
+ git_filter_list *filters,
+ git_buf *data,
+ git_writestream *target);
+
+GIT_EXTERN(int) git_filter_list_stream_file(
+ git_filter_list *filters,
+ git_repository *repo,
+ const char *path,
+ git_writestream *target);
+
+GIT_EXTERN(int) git_filter_list_stream_blob(
+ git_filter_list *filters,
+ git_blob *blob,
+ git_writestream *target);
+
/**
* Free a git_filter_list
*
diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h
index 60248271a..5fd8d5566 100644
--- a/include/git2/sys/filter.h
+++ b/include/git2/sys/filter.h
@@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src);
GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src);
/**
- * Get the combination git_filter_opt_t options to be applied
+ * Get the combination git_filter_flag_t options to be applied
*/
-GIT_EXTERN(uint32_t) git_filter_source_options(const git_filter_source *src);
+GIT_EXTERN(uint32_t) git_filter_source_flags(const git_filter_source *src);
/*
* struct git_filter
@@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)(
const git_buf *from,
const git_filter_source *src);
+typedef int (*git_filter_stream_fn)(
+ git_writestream **out,
+ git_filter *self,
+ void **payload,
+ const git_filter_source *src,
+ git_writestream *next);
+
/**
* Callback to clean up after filtering has been applied
*
@@ -247,6 +254,7 @@ struct git_filter {
git_filter_shutdown_fn shutdown;
git_filter_check_fn check;
git_filter_apply_fn apply;
+ git_filter_stream_fn stream;
git_filter_cleanup_fn cleanup;
};
diff --git a/include/git2/types.h b/include/git2/types.h
index 35e1573c7..c90ac4776 100644
--- a/include/git2/types.h
+++ b/include/git2/types.h
@@ -410,6 +410,15 @@ typedef enum {
GIT_SUBMODULE_RECURSE_ONDEMAND = 2,
} git_submodule_recurse_t;
+/** A type to write in a streaming fashion, for example, for filters. */
+typedef struct git_writestream git_writestream;
+
+struct git_writestream {
+ int (*write)(git_writestream *stream, const char *buffer, size_t len);
+ int (*close)(git_writestream *stream);
+ void (*free)(git_writestream *stream);
+};
+
/** @} */
GIT_END_DECL
diff --git a/src/attr.c b/src/attr.c
index 44593da81..38420807a 100644
--- a/src/attr.c
+++ b/src/attr.c
@@ -282,9 +282,8 @@ static int system_attr_file(
* a consumer. This allows them to treat this as a regular `git_buf`,
* but their call to `git_buf_free` will not attempt to free it.
*/
- out->ptr = attr_session->sysdir.ptr;
- out->size = attr_session->sysdir.size;
- out->asize = 0;
+ git_buf_attach_notowned(
+ out, attr_session->sysdir.ptr, attr_session->sysdir.size);
return 0;
}
diff --git a/src/blob.c b/src/blob.c
index 30d5b705b..cf0329064 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -199,7 +199,7 @@ int git_blob__create_from_paths(
/* Load the filters for writing this file to the ODB */
error = git_filter_list_load(
&fl, repo, NULL, hint_path,
- GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
+ GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
/* well, that didn't work */;
@@ -329,15 +329,13 @@ cleanup:
int git_blob_is_binary(const git_blob *blob)
{
- git_buf content;
+ git_buf content = GIT_BUF_INIT;
assert(blob);
- content.ptr = blob->odb_object->buffer;
- content.size =
- min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL);
- content.asize = 0;
-
+ git_buf_attach_notowned(&content, blob->odb_object->buffer,
+ min(blob->odb_object->cached.size,
+ GIT_FILTER_BYTES_TO_CHECK_NUL));
return git_buf_text_is_binary(&content);
}
@@ -359,7 +357,7 @@ int git_blob_filtered_content(
if (!(error = git_filter_list_load(
&fl, git_blob_owner(blob), blob, path,
- GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT))) {
+ GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) {
error = git_filter_list_apply_to_blob(out, fl, blob);
diff --git a/src/buffer.c b/src/buffer.c
index 3deb0329c..f633c5e02 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize)
}
}
+void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size)
+{
+ if (git_buf_is_allocated(buf))
+ git_buf_free(buf);
+
+ if (!size) {
+ git_buf_init(buf, 0);
+ } else {
+ buf->ptr = (char *)ptr;
+ buf->asize = 0;
+ buf->size = size;
+ }
+}
+
int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...)
{
va_list ap;
diff --git a/src/buffer.h b/src/buffer.h
index 52342e309..093ed9b60 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
extern char *git_buf_detach(git_buf *buf);
extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
+/* Populates a `git_buf` where the contents are not "owned" by the
+ * buffer, and calls to `git_buf_free` will not free the given buf.
+ */
+extern void git_buf_attach_notowned(
+ git_buf *buf, const char *ptr, size_t size);
+
/**
* Test if there have been any reallocation failures with this git_buf.
*
diff --git a/src/checkout.c b/src/checkout.c
index 880af3dff..f71be26f9 100644
--- a/src/checkout.c
+++ b/src/checkout.c
@@ -17,6 +17,7 @@
#include "git2/diff.h"
#include "git2/submodule.h"
#include "git2/sys/index.h"
+#include "git2/sys/filter.h"
#include "refs.h"
#include "repository.h"
@@ -1371,39 +1372,37 @@ static int mkpath2file(
return error;
}
-static int buffer_to_file(
- checkout_data *data,
- struct stat *st,
- git_buf *buf,
- const char *path,
- mode_t file_mode)
-{
- int error;
-
- if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
- return error;
+struct checkout_stream {
+ git_writestream base;
+ const char *path;
+ int fd;
+ int open;
+};
- if ((error = git_futils_writebuffer(
- buf, path, data->opts.file_open_flags, file_mode)) < 0)
- return error;
+static int checkout_stream_write(
+ git_writestream *s, const char *buffer, size_t len)
+{
+ struct checkout_stream *stream = (struct checkout_stream *)s;
+ int ret;
- if (st) {
- data->perfdata.stat_calls++;
+ if ((ret = p_write(stream->fd, buffer, len)) < 0)
+ giterr_set(GITERR_OS, "Could not write to '%s'", stream->path);
- if ((error = p_stat(path, st)) < 0) {
- giterr_set(GITERR_OS, "Error statting '%s'", path);
- return error;
- }
- }
+ return ret;
+}
- if (GIT_PERMS_IS_EXEC(file_mode)) {
- data->perfdata.chmod_calls++;
+static int checkout_stream_close(git_writestream *s)
+{
+ struct checkout_stream *stream = (struct checkout_stream *)s;
+ assert(stream && stream->open);
- if ((error = p_chmod(path, file_mode)) < 0)
- giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
- }
+ stream->open = 0;
+ return p_close(stream->fd);
+}
- return error;
+static void checkout_stream_free(git_writestream *s)
+{
+ GIT_UNUSED(s);
}
static int blob_content_to_file(
@@ -1411,36 +1410,83 @@ static int blob_content_to_file(
struct stat *st,
git_blob *blob,
const char *path,
- const char * hint_path,
+ const char *hint_path,
mode_t entry_filemode)
{
+ int flags = data->opts.file_open_flags;
mode_t file_mode = data->opts.file_mode ?
data->opts.file_mode : entry_filemode;
- git_buf out = GIT_BUF_INIT;
+ git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
+ struct checkout_stream writer;
+ mode_t mode;
git_filter_list *fl = NULL;
+ int fd;
int error = 0;
if (hint_path == NULL)
hint_path = path;
- if (!data->opts.disable_filters)
- error = git_filter_list__load_with_attr_session(
- &fl, data->repo, &data->attr_session, blob, hint_path,
- GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT);
+ if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
+ return error;
- if (!error)
- error = git_filter_list_apply_to_blob(&out, fl, blob);
+ if (flags <= 0)
+ flags = O_CREAT | O_TRUNC | O_WRONLY;
+ if (!(mode = file_mode))
+ mode = GIT_FILEMODE_BLOB;
+
+ if ((fd = p_open(path, flags, mode)) < 0) {
+ giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
+ return fd;
+ }
+
+ filter_opts.attr_session = &data->attr_session;
+ filter_opts.temp_buf = &data->tmp;
+
+ if (!data->opts.disable_filters &&
+ (error = git_filter_list__load_ext(
+ &fl, data->repo, blob, hint_path,
+ GIT_FILTER_TO_WORKTREE, &filter_opts)))
+ return error;
+
+ /* setup the writer */
+ memset(&writer, 0, sizeof(struct checkout_stream));
+ writer.base.write = checkout_stream_write;
+ writer.base.close = checkout_stream_close;
+ writer.base.free = checkout_stream_free;
+ writer.path = path;
+ writer.fd = fd;
+ writer.open = 1;
+
+ error = git_filter_list_stream_blob(fl, blob, (git_writestream *)&writer);
+
+ assert(writer.open == 0);
git_filter_list_free(fl);
- if (!error) {
- error = buffer_to_file(data, st, &out, path, file_mode);
- st->st_mode = entry_filemode;
+ if (error < 0)
+ return error;
+
+ if (GIT_PERMS_IS_EXEC(mode)) {
+ data->perfdata.chmod_calls++;
+
+ if ((error = p_chmod(path, mode)) < 0) {
+ giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
+ return error;
+ }
+ }
+
+ if (st) {
+ data->perfdata.stat_calls++;
- git_buf_free(&out);
+ if ((error = p_stat(path, st)) < 0) {
+ giterr_set(GITERR_OS, "Error statting '%s'", path);
+ return error;
+ }
+
+ st->st_mode = entry_filemode;
}
- return error;
+ return 0;
}
static int blob_content_to_link(
@@ -1958,6 +2004,7 @@ static int checkout_write_merge(
git_merge_file_result result = {0};
git_filebuf output = GIT_FILEBUF_INIT;
git_filter_list *fl = NULL;
+ git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
int error = 0;
if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3)
@@ -2007,9 +2054,12 @@ static int checkout_write_merge(
in_data.ptr = (char *)result.ptr;
in_data.size = result.len;
- if ((error = git_filter_list__load_with_attr_session(
- &fl, data->repo, &data->attr_session, NULL, git_buf_cstr(&path_workdir),
- GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT)) < 0 ||
+ filter_opts.attr_session = &data->attr_session;
+ filter_opts.temp_buf = &data->tmp;
+
+ if ((error = git_filter_list__load_ext(
+ &fl, data->repo, NULL, git_buf_cstr(&path_workdir),
+ GIT_FILTER_TO_WORKTREE, &filter_opts)) < 0 ||
(error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0)
goto done;
} else {
diff --git a/src/crlf.c b/src/crlf.c
index c0a73990f..b5d1dbf32 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -302,7 +302,7 @@ static int crlf_check(
return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
- if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) &&
+ if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
ca.safe_crlf = GIT_SAFE_CRLF_WARN;
}
diff --git a/src/diff.c b/src/diff.c
index 07eae03e7..815351b21 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -600,7 +600,7 @@ int git_diff__oid_for_entry(
error = -1;
} else if (!(error = git_filter_list_load(
&fl, diff->repo, NULL, entry.path,
- GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)))
+ GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)))
{
int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0)
diff --git a/src/diff_driver.c b/src/diff_driver.c
index 7313ab573..049e6ef2a 100644
--- a/src/diff_driver.c
+++ b/src/diff_driver.c
@@ -418,14 +418,13 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len)
{
- git_buf search;
-
- search.ptr = (char *)content;
- search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
- search.asize = 0;
+ git_buf search = GIT_BUF_INIT;
GIT_UNUSED(driver);
+ git_buf_attach_notowned(&search, content,
+ min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
+
/* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
diff --git a/src/diff_file.c b/src/diff_file.c
index 96be0942b..f7061ae83 100644
--- a/src/diff_file.c
+++ b/src/diff_file.c
@@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file(
if ((error = git_filter_list_load(
&fl, fc->repo, NULL, fc->file->path,
- GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)) < 0)
+ GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
goto cleanup;
/* if there are no filters, try to mmap the file */
diff --git a/src/filter.c b/src/filter.c
index 7b54a76c0..4fbf84f6a 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -23,7 +23,7 @@ struct git_filter_source {
git_oid oid; /* zero if unknown (which is likely) */
uint16_t filemode; /* zero if unknown */
git_filter_mode_t mode;
- uint32_t options;
+ uint32_t flags;
};
typedef struct {
@@ -34,6 +34,7 @@ typedef struct {
struct git_filter_list {
git_array_t(git_filter_entry) filters;
git_filter_source source;
+ git_buf *temp_buf;
char path[GIT_FLEX_ARRAY];
};
@@ -371,9 +372,9 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src)
return src->mode;
}
-uint32_t git_filter_source_options(const git_filter_source *src)
+uint32_t git_filter_source_flags(const git_filter_source *src)
{
- return src->options;
+ return src->flags;
}
static int filter_list_new(
@@ -393,7 +394,7 @@ static int filter_list_new(
fl->source.repo = src->repo;
fl->source.path = fl->path;
fl->source.mode = src->mode;
- fl->source.options = src->options;
+ fl->source.flags = src->flags;
*out = fl;
return 0;
@@ -448,24 +449,23 @@ int git_filter_list_new(
git_filter_list **out,
git_repository *repo,
git_filter_mode_t mode,
- uint32_t options)
+ uint32_t flags)
{
git_filter_source src = { 0 };
src.repo = repo;
src.path = NULL;
src.mode = mode;
- src.options = options;
+ src.flags = flags;
return filter_list_new(out, &src);
}
-int git_filter_list__load_with_attr_session(
+int git_filter_list__load_ext(
git_filter_list **filters,
git_repository *repo,
- git_attr_session *attr_session,
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
- uint32_t options)
+ git_filter_options *filter_opts)
{
int error = 0;
git_filter_list *fl = NULL;
@@ -480,7 +480,8 @@ int git_filter_list__load_with_attr_session(
src.repo = repo;
src.path = path;
src.mode = mode;
- src.options = options;
+ src.flags = filter_opts->flags;
+
if (blob)
git_oid_cpy(&src.oid, git_blob_id(blob));
@@ -493,7 +494,7 @@ int git_filter_list__load_with_attr_session(
if (fdef->nattrs > 0) {
error = filter_list_check_attributes(
- &values, repo, attr_session, fdef, &src);
+ &values, repo, filter_opts->attr_session, fdef, &src);
if (error == GIT_ENOTFOUND) {
error = 0;
@@ -516,8 +517,12 @@ int git_filter_list__load_with_attr_session(
else if (error < 0)
break;
else {
- if (!fl && (error = filter_list_new(&fl, &src)) < 0)
- return error;
+ if (!fl) {
+ if ((error = filter_list_new(&fl, &src)) < 0)
+ return error;
+
+ fl->temp_buf = filter_opts->temp_buf;
+ }
fe = git_array_alloc(fl->filters);
GITERR_CHECK_ALLOC(fe);
@@ -542,10 +547,14 @@ int git_filter_list_load(
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
- uint32_t options)
+ uint32_t flags)
{
- return git_filter_list__load_with_attr_session(
- filters, repo, NULL, blob, path, mode, options);
+ git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
+
+ filter_opts.flags = flags;
+
+ return git_filter_list__load_ext(
+ filters, repo, blob, path, mode, &filter_opts);
}
void git_filter_list_free(git_filter_list *fl)
@@ -600,84 +609,72 @@ size_t git_filter_list_length(const git_filter_list *fl)
return fl ? git_array_size(fl->filters) : 0;
}
-static int filter_list_out_buffer_from_raw(
- git_buf *out, const void *ptr, size_t size)
+struct buf_stream {
+ git_writestream parent;
+ git_buf *target;
+ bool complete;
+};
+
+static int buf_stream_write(
+ git_writestream *s, const char *buffer, size_t len)
{
- if (git_buf_is_allocated(out))
- git_buf_free(out);
+ struct buf_stream *buf_stream = (struct buf_stream *)s;
+ assert(buf_stream);
- if (!size) {
- git_buf_init(out, 0);
- } else {
- out->ptr = (char *)ptr;
- out->asize = 0;
- out->size = size;
- }
+ assert(buf_stream->complete == 0);
- return 0;
+ return git_buf_put(buf_stream->target, buffer, len);
}
-int git_filter_list_apply_to_data(
- git_buf *tgt, git_filter_list *fl, git_buf *src)
+static int buf_stream_close(git_writestream *s)
{
- int error = 0;
- uint32_t i;
- git_buf *dbuffer[2], local = GIT_BUF_INIT;
- unsigned int si = 0;
+ struct buf_stream *buf_stream = (struct buf_stream *)s;
+ assert(buf_stream);
- git_buf_sanitize(tgt);
- git_buf_sanitize(src);
+ assert(buf_stream->complete == 0);
+ buf_stream->complete = 1;
- if (!fl)
- return filter_list_out_buffer_from_raw(tgt, src->ptr, src->size);
+ return 0;
+}
- dbuffer[0] = src;
- dbuffer[1] = tgt;
+static void buf_stream_free(git_writestream *s)
+{
+ GIT_UNUSED(s);
+}
- /* if `src` buffer is reallocable, then use it, otherwise copy it */
- if (!git_buf_is_allocated(src)) {
- if (git_buf_set(&local, src->ptr, src->size) < 0)
- return -1;
- dbuffer[0] = &local;
- }
+static void buf_stream_init(struct buf_stream *writer, git_buf *target)
+{
+ memset(writer, 0, sizeof(struct buf_stream));
- for (i = 0; i < git_array_size(fl->filters); ++i) {
- unsigned int di = 1 - si;
- uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_WORKTREE) ?
- i : git_array_size(fl->filters) - 1 - i;
- git_filter_entry *fe = git_array_get(fl->filters, fidx);
-
- dbuffer[di]->size = 0;
-
- /* Apply the filter from dbuffer[src] to the other buffer;
- * if the filtering is canceled by the user mid-filter,
- * we skip to the next filter without changing the source
- * of the double buffering (so that the text goes through
- * cleanly).
- */
+ writer->parent.write = buf_stream_write;
+ writer->parent.close = buf_stream_close;
+ writer->parent.free = buf_stream_free;
+ writer->target = target;
- error = fe->filter->apply(
- fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source);
+ git_buf_clear(target);
+}
- if (error == GIT_PASSTHROUGH) {
- /* PASSTHROUGH means filter decided not to process the buffer */
- error = 0;
- } else if (!error) {
- git_buf_sanitize(dbuffer[di]); /* force NUL termination */
- si = di; /* swap buffers */
- } else {
- tgt->size = 0;
- goto cleanup;
- }
+int git_filter_list_apply_to_data(
+ git_buf *tgt, git_filter_list *filters, git_buf *src)
+{
+ struct buf_stream writer;
+ int error;
+
+ git_buf_sanitize(tgt);
+ git_buf_sanitize(src);
+
+ if (!filters) {
+ git_buf_attach_notowned(tgt, src->ptr, src->size);
+ return 0;
}
- /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
- if (si != 1)
- git_buf_swap(dbuffer[0], dbuffer[1]);
+ buf_stream_init(&writer, tgt);
-cleanup:
- git_buf_free(&local); /* don't leak if we allocated locally */
+ if ((error = git_filter_list_stream_data(filters, src,
+ (git_writestream *)&writer)) < 0)
+ return error;
+ assert(writer.complete);
return error;
}
@@ -687,28 +684,21 @@ int git_filter_list_apply_to_file(
git_repository *repo,
const char *path)
{
+ struct buf_stream writer;
int error;
- const char *base = repo ? git_repository_workdir(repo) : NULL;
- git_buf abspath = GIT_BUF_INIT, raw = GIT_BUF_INIT;
- if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) &&
- !(error = git_futils_readbuffer(&raw, abspath.ptr)))
- {
- error = git_filter_list_apply_to_data(out, filters, &raw);
+ buf_stream_init(&writer, out);
- git_buf_free(&raw);
- }
+ if ((error = git_filter_list_stream_file(
+ filters, repo, path, (git_writestream *)&writer)) < 0)
+ return error;
- git_buf_free(&abspath);
+ assert(writer.complete);
return error;
}
-int git_filter_list_apply_to_blob(
- git_buf *out,
- git_filter_list *filters,
- git_blob *blob)
+static int buf_from_blob(git_buf *out, git_blob *blob)
{
- git_buf in = GIT_BUF_INIT;
git_off_t rawsize = git_blob_rawsize(blob);
if (!git__is_sizet(rawsize)) {
@@ -716,12 +706,250 @@ int git_filter_list_apply_to_blob(
return -1;
}
- in.ptr = (char *)git_blob_rawcontent(blob);
- in.asize = 0;
- in.size = (size_t)rawsize;
+ git_buf_attach_notowned(out, git_blob_rawcontent(blob), (size_t)rawsize);
+ return 0;
+}
+
+int git_filter_list_apply_to_blob(
+ git_buf *out,
+ git_filter_list *filters,
+ git_blob *blob)
+{
+ struct buf_stream writer;
+ int error;
+
+ buf_stream_init(&writer, out);
+
+ if ((error = git_filter_list_stream_blob(
+ filters, blob, (git_writestream *)&writer)) < 0)
+ return error;
+
+ assert(writer.complete);
+ return error;
+}
+
+struct proxy_stream {
+ git_writestream parent;
+ git_filter *filter;
+ const git_filter_source *source;
+ void **payload;
+ git_buf input;
+ git_buf temp_buf;
+ git_buf *output;
+ git_writestream *target;
+};
+
+static int proxy_stream_write(
+ git_writestream *s, const char *buffer, size_t len)
+{
+ struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
+ assert(proxy_stream);
+
+ return git_buf_put(&proxy_stream->input, buffer, len);
+}
+
+static int proxy_stream_close(git_writestream *s)
+{
+ struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
+ git_buf *writebuf;
+ int error;
+
+ assert(proxy_stream);
+
+ error = proxy_stream->filter->apply(
+ proxy_stream->filter,
+ proxy_stream->payload,
+ proxy_stream->output,
+ &proxy_stream->input,
+ proxy_stream->source);
+
+ if (error == GIT_PASSTHROUGH) {
+ writebuf = &proxy_stream->input;
+ } else if (error == 0) {
+ git_buf_sanitize(proxy_stream->output);
+ writebuf = proxy_stream->output;
+ } else {
+ return error;
+ }
+
+ if ((error = proxy_stream->target->write(
+ proxy_stream->target, writebuf->ptr, writebuf->size)) == 0)
+ error = proxy_stream->target->close(proxy_stream->target);
+
+ return error;
+}
+
+static void proxy_stream_free(git_writestream *s)
+{
+ struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
+ assert(proxy_stream);
+
+ git_buf_free(&proxy_stream->input);
+ git_buf_free(&proxy_stream->temp_buf);
+ git__free(proxy_stream);
+}
+
+static int proxy_stream_init(
+ git_writestream **out,
+ git_filter *filter,
+ git_buf *temp_buf,
+ void **payload,
+ const git_filter_source *source,
+ git_writestream *target)
+{
+ struct proxy_stream *proxy_stream = git__calloc(1, sizeof(struct proxy_stream));
+ GITERR_CHECK_ALLOC(proxy_stream);
+
+ proxy_stream->parent.write = proxy_stream_write;
+ proxy_stream->parent.close = proxy_stream_close;
+ proxy_stream->parent.free = proxy_stream_free;
+ proxy_stream->filter = filter;
+ proxy_stream->payload = payload;
+ proxy_stream->source = source;
+ proxy_stream->target = target;
+ proxy_stream->output = temp_buf ? temp_buf : &proxy_stream->temp_buf;
+
+ *out = (git_writestream *)proxy_stream;
+ return 0;
+}
+
+static int stream_list_init(
+ git_writestream **out,
+ git_vector *streams,
+ git_filter_list *filters,
+ git_writestream *target)
+{
+ git_writestream *last_stream = target;
+ size_t i;
+ int error = 0;
+
+ *out = NULL;
+
+ if (!filters) {
+ *out = target;
+ return 0;
+ }
+
+ /* Create filters last to first to get the chaining direction */
+ for (i = 0; i < git_array_size(filters->filters); ++i) {
+ size_t filter_idx = (filters->source.mode == GIT_FILTER_TO_WORKTREE) ?
+ git_array_size(filters->filters) - 1 - i : i;
+ git_filter_entry *fe = git_array_get(filters->filters, filter_idx);
+ git_writestream *filter_stream;
+
+ assert(fe->filter->stream || fe->filter->apply);
+
+ /* If necessary, create a stream that proxies the traditional
+ * application.
+ */
+ if (fe->filter->stream)
+ error = fe->filter->stream(&filter_stream, fe->filter,
+ &fe->payload, &filters->source, last_stream);
+ else
+ /* Create a stream that proxies the one-shot apply */
+ error = proxy_stream_init(&filter_stream, fe->filter,
+ filters->temp_buf, &fe->payload, &filters->source,
+ last_stream);
+
+ if (error < 0)
+ return error;
+
+ git_vector_insert(streams, filter_stream);
+ last_stream = filter_stream;
+ }
+
+ *out = last_stream;
+ return 0;
+}
+
+void stream_list_free(git_vector *streams)
+{
+ git_writestream *stream;
+ size_t i;
+
+ git_vector_foreach(streams, i, stream)
+ stream->free(stream);
+ git_vector_free(streams);
+}
+
+#define STREAM_BUFSIZE 10240
+
+/* TODO: maybe not use filter_stream as a target but create one */
+int git_filter_list_stream_file(
+ git_filter_list *filters,
+ git_repository *repo,
+ const char *path,
+ git_writestream *target)
+{
+ char buf[STREAM_BUFSIZE];
+ git_buf abspath = GIT_BUF_INIT;
+ const char *base = repo ? git_repository_workdir(repo) : NULL;
+ git_vector filter_streams = GIT_VECTOR_INIT;
+ git_writestream *stream_start;
+ ssize_t readlen;
+ int fd, error;
+
+ if ((error = stream_list_init(
+ &stream_start, &filter_streams, filters, target)) < 0 ||
+ (error = git_path_join_unrooted(&abspath, path, base, NULL)) < 0)
+ goto done;
+
+ if ((fd = git_futils_open_ro(path)) < 0) {
+ error = fd;
+ goto done;
+ }
+
+ while ((readlen = p_read(fd, buf, STREAM_BUFSIZE)) > 0) {
+ if ((error = stream_start->write(stream_start, buf, readlen)) < 0)
+ goto done;
+ }
+
+ if (!readlen)
+ error = stream_start->close(stream_start);
+ else if (readlen < 0)
+ error = readlen;
+
+ p_close(fd);
+
+done:
+ stream_list_free(&filter_streams);
+ git_buf_free(&abspath);
+ return error;
+}
+
+int git_filter_list_stream_data(
+ git_filter_list *filters,
+ git_buf *data,
+ git_writestream *target)
+{
+ git_vector filter_streams = GIT_VECTOR_INIT;
+ git_writestream *stream_start;
+ int error = 0;
+
+ git_buf_sanitize(data);
+
+ if ((error = stream_list_init(
+ &stream_start, &filter_streams, filters, target)) == 0 &&
+ (error =
+ stream_start->write(stream_start, data->ptr, data->size)) == 0)
+ error = stream_start->close(stream_start);
+
+ stream_list_free(&filter_streams);
+ return error;
+}
+
+int git_filter_list_stream_blob(
+ git_filter_list *filters,
+ git_blob *blob,
+ git_writestream *target)
+{
+ git_buf in = GIT_BUF_INIT;
+
+ if (buf_from_blob(&in, blob) < 0)
+ return -1;
if (filters)
git_oid_cpy(&filters->source.oid, git_blob_id(blob));
- return git_filter_list_apply_to_data(out, filters, &in);
+ return git_filter_list_stream_data(filters, &in, target);
}
diff --git a/src/filter.h b/src/filter.h
index 390ffebad..5062afba5 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -24,16 +24,23 @@ typedef enum {
GIT_CRLF_AUTO,
} git_crlf_t;
+typedef struct {
+ git_attr_session *attr_session;
+ git_buf *temp_buf;
+ uint32_t flags;
+} git_filter_options;
+
+#define GIT_FILTER_OPTIONS_INIT {0}
+
extern void git_filter_free(git_filter *filter);
-extern int git_filter_list__load_with_attr_session(
+extern int git_filter_list__load_ext(
git_filter_list **filters,
git_repository *repo,
- git_attr_session *attr_session,
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
- uint32_t options);
+ git_filter_options *filter_opts);
/*
* Available filters
diff --git a/src/repository.c b/src/repository.c
index c9275078f..23c99b0f0 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -1849,7 +1849,7 @@ int git_repository_hashfile(
if (strlen(as_path) > 0) {
error = git_filter_list_load(
&fl, repo, NULL, as_path,
- GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
+ GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
return error;
} else {
diff --git a/tests/filter/crlf.c b/tests/filter/crlf.c
index a31dac965..406d3b6b0 100644
--- a/tests/filter/crlf.c
+++ b/tests/filter/crlf.c
@@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void)
cl_repo_set_bool(g_repo, "core.safecrlf", true);
cl_git_pass(git_filter_list_new(
- &fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE));
+ &fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE));
crlf = git_filter_lookup(GIT_FILTER_CRLF);
cl_assert(crlf != NULL);
diff --git a/tests/filter/stream.c b/tests/filter/stream.c
new file mode 100644
index 000000000..603f19494
--- /dev/null
+++ b/tests/filter/stream.c
@@ -0,0 +1,221 @@
+#include "clar_libgit2.h"
+#include "posix.h"
+#include "blob.h"
+#include "filter.h"
+#include "buf_text.h"
+#include "git2/sys/filter.h"
+#include "git2/sys/repository.h"
+
+static git_repository *g_repo = NULL;
+
+static git_filter *create_compress_filter(void);
+static git_filter *compress_filter;
+
+void test_filter_stream__initialize(void)
+{
+ compress_filter = create_compress_filter();
+
+ cl_git_pass(git_filter_register("compress", compress_filter, 50));
+ g_repo = cl_git_sandbox_init("empty_standard_repo");
+}
+
+void test_filter_stream__cleanup(void)
+{
+ cl_git_sandbox_cleanup();
+ g_repo = NULL;
+
+ git_filter_unregister("compress");
+}
+
+#define CHUNKSIZE 10240
+
+struct compress_stream {
+ git_writestream parent;
+ git_writestream *next;
+ git_filter_mode_t mode;
+ char current;
+ size_t current_chunk;
+};
+
+static int compress_stream_write__deflated(struct compress_stream *stream, const char *buffer, size_t len)
+{
+ size_t idx = 0;
+
+ while (len > 0) {
+ size_t chunkremain, chunksize;
+
+ if (stream->current_chunk == 0)
+ stream->current = buffer[idx];
+
+ chunkremain = CHUNKSIZE - stream->current_chunk;
+ chunksize = min(chunkremain, len);
+
+ stream->current_chunk += chunksize;
+ len -= chunksize;
+ idx += chunksize;
+
+ if (stream->current_chunk == CHUNKSIZE) {
+ cl_git_pass(stream->next->write(stream->next, &stream->current, 1));
+ stream->current_chunk = 0;
+ }
+ }
+
+ return 0;
+}
+
+static int compress_stream_write__inflated(struct compress_stream *stream, const char *buffer, size_t len)
+{
+ char inflated[CHUNKSIZE];
+ size_t i, j;
+
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < CHUNKSIZE; j++)
+ inflated[j] = buffer[i];
+
+ cl_git_pass(stream->next->write(stream->next, inflated, CHUNKSIZE));
+ }
+
+ return 0;
+}
+
+static int compress_stream_write(git_writestream *s, const char *buffer, size_t len)
+{
+ struct compress_stream *stream = (struct compress_stream *)s;
+
+ return (stream->mode == GIT_FILTER_TO_ODB) ?
+ compress_stream_write__deflated(stream, buffer, len) :
+ compress_stream_write__inflated(stream, buffer, len);
+}
+
+static int compress_stream_close(git_writestream *s)
+{
+ struct compress_stream *stream = (struct compress_stream *)s;
+ cl_assert_equal_i(0, stream->current_chunk);
+ stream->next->close(stream->next);
+ return 0;
+}
+
+static void compress_stream_free(git_writestream *stream)
+{
+ git__free(stream);
+}
+
+static int compress_filter_stream_init(
+ git_writestream **out,
+ git_filter *self,
+ void **payload,
+ const git_filter_source *src,
+ git_writestream *next)
+{
+ struct compress_stream *stream = git__calloc(1, sizeof(struct compress_stream));
+ cl_assert(stream);
+
+ GIT_UNUSED(self);
+ GIT_UNUSED(payload);
+
+ stream->parent.write = compress_stream_write;
+ stream->parent.close = compress_stream_close;
+ stream->parent.free = compress_stream_free;
+ stream->next = next;
+ stream->mode = git_filter_source_mode(src);
+
+ *out = (git_writestream *)stream;
+ return 0;
+}
+
+static void compress_filter_free(git_filter *f)
+{
+ git__free(f);
+}
+
+git_filter *create_compress_filter(void)
+{
+ git_filter *filter = git__calloc(1, sizeof(git_filter));
+ cl_assert(filter);
+
+ filter->version = GIT_FILTER_VERSION;
+ filter->attributes = "+compress";
+ filter->stream = compress_filter_stream_init;
+ filter->shutdown = compress_filter_free;
+
+ return filter;
+}
+
+static void writefile(const char *filename, size_t numchunks)
+{
+ git_buf path = GIT_BUF_INIT;
+ char buf[CHUNKSIZE];
+ size_t i = 0, j = 0;
+ int fd;
+
+ cl_git_pass(git_buf_joinpath(&path, "empty_standard_repo", filename));
+
+ fd = p_open(path.ptr, O_RDWR|O_CREAT, 0666);
+ cl_assert(fd >= 0);
+
+ for (i = 0; i < numchunks; i++) {
+ for (j = 0; j < CHUNKSIZE; j++) {
+ buf[j] = i % 256;
+ }
+
+ cl_git_pass(p_write(fd, buf, CHUNKSIZE));
+ }
+ p_close(fd);
+
+ git_buf_free(&path);
+}
+
+static void test_stream(size_t numchunks)
+{
+ git_index *index;
+ const git_index_entry *entry;
+ git_blob *blob;
+ struct stat st;
+ git_checkout_options checkout_opts = GIT_CHECKOUT_OPTIONS_INIT;
+
+ checkout_opts.checkout_strategy = GIT_CHECKOUT_FORCE;
+
+ cl_git_mkfile(
+ "empty_standard_repo/.gitattributes",
+ "* compress\n");
+
+ /* write a file to disk */
+ writefile("streamed_file", numchunks);
+
+ /* place it in the index */
+ cl_git_pass(git_repository_index(&index, g_repo));
+ cl_git_pass(git_index_add_bypath(index, "streamed_file"));
+ cl_git_pass(git_index_write(index));
+
+ /* ensure it was appropriately compressed */
+ cl_assert(entry = git_index_get_bypath(index, "streamed_file", 0));
+
+ cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id));
+ cl_assert_equal_i(numchunks, git_blob_rawsize(blob));
+
+ /* check the file back out */
+ cl_must_pass(p_unlink("empty_standard_repo/streamed_file"));
+ cl_git_pass(git_checkout_index(g_repo, index, &checkout_opts));
+
+ /* ensure it was decompressed */
+ cl_must_pass(p_stat("empty_standard_repo/streamed_file", &st));
+ cl_assert_equal_sz((numchunks * CHUNKSIZE), st.st_size);
+
+ git_index_free(index);
+ git_blob_free(blob);
+}
+
+/* write a 50KB file through the "compression" stream */
+void test_filter_stream__smallfile(void)
+{
+ test_stream(5);
+}
+
+/* optionally write a 500 MB file through the compression stream */
+void test_filter_stream__bigfile(void)
+{
+ if (!cl_getenv("GITTEST_INVASIVE_FS_SIZE"))
+ cl_skip();
+
+ test_stream(51200);
+}
diff --git a/tests/repo/init.c b/tests/repo/init.c
index 91747c9f5..076156817 100644
--- a/tests/repo/init.c
+++ b/tests/repo/init.c
@@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void)
git_buf root = GIT_BUF_INIT;
int root_len;
- if (!cl_getenv("GITTEST_INVASIVE_FILESYSTEM"))
+ if (!cl_getenv("GITTEST_INVASIVE_FS_STRUCTURE"))
cl_skip();
root_len = git_path_root(sandbox);