summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Martí <tanoku@gmail.com>2012-03-01 21:19:51 +0100
committerVicent Martí <tanoku@gmail.com>2012-03-01 21:19:51 +0100
commit47a899ffed3c71080e10e73eda092a716f1be168 (patch)
tree181f4f0b52318fbe65fbd9aa3079593533d45715
parent788430c8e3fa90dd965b44fb31ba8b2eece2ca37 (diff)
downloadlibgit2-47a899ffed3c71080e10e73eda092a716f1be168.tar.gz
filter: Beautiful refactoring
Comments soothe my soul.
-rw-r--r--src/blob.c34
-rw-r--r--src/crlf.c116
-rw-r--r--src/filter.c116
-rw-r--r--src/filter.h84
4 files changed, 235 insertions, 115 deletions
diff --git a/src/blob.c b/src/blob.c
index 245326157..e1f4a7f6a 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -115,19 +115,18 @@ static int write_file_filtered(
if (error < GIT_SUCCESS)
return error;
- error = git_filter__apply(&dest, &source, filters);
+ error = git_filters_apply(&dest, &source, filters);
- if (error < GIT_SUCCESS) {
- git_buf_free(&source);
- git_buf_free(&dest);
- return error;
- }
+ /* Free the source as soon as possible. This can be big in memory,
+ * and we don't want to ODB write to choke */
+ git_buf_free(&source);
- error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
+ if (error == GIT_SUCCESS) {
+ /* Write the file to disk if it was properly filtered */
+ error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
+ }
- git_buf_free(&source);
git_buf_free(&dest);
-
return GIT_SUCCESS;
}
@@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
error = write_symlink(oid, odb, full_path.ptr, (size_t)size);
} else {
git_vector write_filters = GIT_VECTOR_INIT;
+ int filter_count;
- if ((error = git_filter__load_for_file(
- &write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
- goto cleanup;
+ /* Load the filters for writing this file to the ODB */
+ filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB);
- if (write_filters.length == 0) {
+ if (filter_count < 0) {
+ /* Negative value means there was a critical error */
+ error = filter_count;
+ goto cleanup;
+ } else if (filter_count == 0) {
+ /* No filters need to be applied to the document: we can stream
+ * directly from disk */
error = write_file_stream(oid, odb, full_path.ptr, size);
} else {
+ /* We need to apply one or more filters */
error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
}
- git_filter__free(&write_filters);
+ git_filters_free(&write_filters);
/*
* TODO: eventually support streaming filtered files, for files which are bigger
diff --git a/src/crlf.c b/src/crlf.c
index d8dd1c382..feaa687ee 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
return error;
}
-static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
+static int drop_crlf(git_buf *dest, const git_buf *source)
{
+ size_t psize = source->size - 1;
size_t i = 0;
+
+ /* Initial scan: see if we can reach the end of the document
+ * without finding a single carriage return */
+ while (i < psize && source->ptr[i] != '\r')
+ i++;
+
+ /* Clean file? Tell the library to skip this filter */
+ if (i == psize)
+ return -1;
+
+ /* Main scan loop. Keep moving forward until we find a carriage
+ * return, and then copy the whole chunk to the destination
+ * buffer.
+ *
+ * Note that we only scan until `size - 1`, because we cannot drop a
+ * carriage return if it's the last character in the file (what a weird
+ * file, anyway)
+ */
+ while (i < psize) {
+ size_t org = i;
+
+ while (i < psize && source->ptr[i] != '\r')
+ i++;
+
+ if (i > org)
+ git_buf_put(dest, source->ptr + org, i - org);
+
+ /* We found a carriage return. Is the next character a newline?
+ * If it is, we just keep moving. The newline will be copied
+ * to the dest in the next chunk.
+ *
+ * If it's not a newline, we need to insert the carriage return
+ * into the dest buffer, because we don't drop lone CRs.
+ */
+ if (source->ptr[i + 1] != '\n') {
+ git_buf_putc(dest, '\r');
+ }
+
+ i++;
+ }
+
+ /* Copy the last character in the file */
+ git_buf_putc(dest, source->ptr[psize]);
+ return 0;
+}
+
+static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
+{
struct crlf_filter *filter = (struct crlf_filter *)self;
assert(self && dest && source);
+ /* Empty file? Nothing to do */
+ if (source->size == 0)
+ return 0;
+
+ /* Heuristics to see if we can skip the conversion.
+ * Straight from Core Git.
+ */
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
git_text_stats stats;
- git_text__stat(&stats, source);
+ git_text_gather_stats(&stats, source);
/*
* We're currently not going to even try to convert stuff
@@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
/*
* And add some heuristics for binary vs text, of course...
*/
- if (git_text__is_binary(&stats))
+ if (git_text_is_binary(&stats))
return -1;
#if 0
@@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
return -1;
}
- /* TODO: do not copy anything if there isn't a single CR */
- while (i < source->size) {
- size_t org = i;
-
- while (i < source->size && source->ptr[i] != '\r')
- i++;
-
- if (i > org)
- git_buf_put(dest, source->ptr + org, i - org);
-
- i++;
-
- if (i >= source->size || source->ptr[i] != '\n') {
- git_buf_putc(dest, '\r');
- }
- }
-
- return 0;
+ /* Actually drop the carriage returns */
+ return drop_crlf(dest, source);
}
-int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path)
+int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path)
{
- struct crlf_filter filter;
+ struct crlf_attrs ca;
+ struct crlf_filter *filter;
int error;
- filter.f.apply = &crlf_apply_to_odb;
- filter.f.do_free = NULL;
-
- if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0)
+ /* Load gitattributes for the path */
+ if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
return error;
- filter.attrs.crlf_action = crlf_input_action(&filter.attrs);
+ /*
+ * Use the core Git logic to see if we should perform CRLF for this file
+ * based on its attributes & the value of `core.auto_crlf`
+ */
+ ca.crlf_action = crlf_input_action(&ca);
- if (filter.attrs.crlf_action == GIT_CRLF_BINARY)
+ if (ca.crlf_action == GIT_CRLF_BINARY)
return 0;
- if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
+ if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
return 0;
- *filter_out = git__malloc(sizeof(struct crlf_filter));
- if (*filter_out == NULL)
+ /* If we're good, we create a new filter object and push it
+ * into the filters array */
+ filter = git__malloc(sizeof(struct crlf_filter));
+ if (filter == NULL)
return GIT_ENOMEM;
- memcpy(*filter_out, &filter, sizeof(struct crlf_attrs));
- return 0;
+ filter->f.apply = &crlf_apply_to_odb;
+ filter->f.do_free = NULL;
+ memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
+
+ return git_vector_insert(filters, filter);
}
diff --git a/src/filter.c b/src/filter.c
index f517512dd..92b3566af 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -13,7 +13,7 @@
#include "git2/config.h"
/* Fresh from Core Git. I wonder what we could use this for... */
-void git_text__stat(git_text_stats *stats, const git_buf *text)
+void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
@@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text)
/*
* Fresh from Core Git
*/
-int git_text__is_binary(git_text_stats *stats)
+int git_text_is_binary(git_text_stats *stats)
{
if (stats->nul)
return 1;
@@ -84,32 +84,74 @@ int git_text__is_binary(git_text_stats *stats)
return 0;
}
-int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode)
+static int load_repository_settings(git_repository *repo)
+{
+ static git_cvar_map map_eol[] = {
+ {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
+ {GIT_CVAR_STRING, "lf", GIT_EOL_LF},
+ {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF},
+ {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE}
+ };
+
+ static git_cvar_map map_crlf[] = {
+ {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE},
+ {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE},
+ {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT}
+ };
+
+ git_config *config;
+ int error;
+
+ if (repo->filter_options.loaded)
+ return GIT_SUCCESS;
+
+ repo->filter_options.eol = GIT_EOL_DEFAULT;
+ repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
+
+ error = git_repository_config__weakptr(&config, repo);
+ if (error < GIT_SUCCESS)
+ return error;
+
+ error = git_config_get_mapped(
+ config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol);
+
+ if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
+ return error;
+
+ error = git_config_get_mapped(
+ config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf);
+
+ if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
+ return error;
+
+ repo->filter_options.loaded = 1;
+ return 0;
+}
+
+int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
- git_filter *crlf_filter = NULL;
- error = git_filter__load_settings(repo);
+ /* Make sure that the relevant settings from `gitconfig` have been
+ * cached on the repository struct to speed things up */
+ error = load_repository_settings(repo);
if (error < GIT_SUCCESS)
return error;
if (mode == GIT_FILTER_TO_ODB) {
- error = git_filter__crlf_to_odb(&crlf_filter, repo, path);
+ /* Load the CRLF cleanup filter when writing to the ODB */
+ error = git_filter_add__crlf_to_odb(filters, repo, path);
if (error < GIT_SUCCESS)
return error;
-
- if (crlf_filter != NULL)
- git_vector_insert(filters, crlf_filter);
-
} else {
return git__throw(GIT_ENOTIMPLEMENTED,
"Worktree filters are not implemented yet");
}
- return 0;
+ return (int)filters->length;
}
-void git_filter__free(git_vector *filters)
+void git_filters_free(git_vector *filters)
{
size_t i;
git_filter *filter;
@@ -124,7 +166,7 @@ void git_filter__free(git_vector *filters)
git_vector_free(filters);
}
-int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
+int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int src, dst, i;
git_buf *dbuffer[2];
@@ -134,6 +176,11 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
src = 0;
+ if (source->size == 0) {
+ git_buf_clear(dest);
+ return GIT_SUCCESS;
+ }
+
/* Pre-grow the destination buffer to more or less the size
* we expect it to have */
if (git_buf_grow(dest, source->size) < 0)
@@ -167,46 +214,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
return GIT_SUCCESS;
}
-int git_filter__load_settings(git_repository *repo)
-{
- static git_cvar_map map_eol[] = {
- {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
- {GIT_CVAR_STRING, "lf", GIT_EOL_LF},
- {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF},
- {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE}
- };
-
- static git_cvar_map map_crlf[] = {
- {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE},
- {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE},
- {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT}
- };
-
- git_config *config;
- int error;
-
- if (repo->filter_options.loaded)
- return GIT_SUCCESS;
-
- repo->filter_options.eol = GIT_EOL_DEFAULT;
- repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
-
- error = git_repository_config__weakptr(&config, repo);
- if (error < GIT_SUCCESS)
- return error;
-
- error = git_config_get_mapped(
- config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol);
-
- if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
- return error;
-
- error = git_config_get_mapped(
- config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf);
-
- if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
- return error;
-
- repo->filter_options.loaded = 1;
- return 0;
-}
diff --git a/src/filter.h b/src/filter.h
index 0cf92bd1d..601be1836 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -60,19 +60,81 @@ typedef struct {
unsigned int printable, nonprintable;
} git_text_stats;
-extern int git_filter__load_settings(git_repository *repo);
-extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
-extern void git_filter__free(git_vector *filters);
-extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters);
+/*
+ * FILTER API
+ */
+
+/*
+ * For any given path in the working directory, fill the `filters`
+ * array with the relevant filters that need to be applied.
+ *
+ * Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the
+ * filters that will be used when checking out a file to the working
+ * directory, or `GIT_FILTER_TO_ODB` for the filters used when writing
+ * a file to the ODB.
+ *
+ * @param filters Vector where to store all the loaded filters
+ * @param repo Repository object that contains `path`
+ * @param path Relative path of the file to be filtered
+ * @param mode Filtering direction (WT->ODB or ODB->WT)
+ * @return the number of filters loaded for the file (0 if the file
+ * doesn't need filtering), or a negative error code
+ */
+extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode);
+
+/*
+ * Apply one or more filters to a file.
+ *
+ * The file must have been loaded as a `git_buf` object. Both the `source`
+ * and `dest` buffers are owned by the caller and must be freed once
+ * they are no longer needed.
+ *
+ * NOTE: Because of the double-buffering schema, the `source` buffer that contains
+ * the original file may be tampered once the filtering is complete. Regardless,
+ * the `dest` buffer will always contain the final result of the filtering
+ *
+ * @param dest Buffer to store the result of the filtering
+ * @param source Buffer containing the document to filter
+ * @param filters A non-empty vector of filters as supplied by `git_filters_load`
+ * @return GIT_SUCCESS on success, an error code otherwise
+ */
+extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters);
+
+/*
+ * Free the `filters` array generated by `git_filters_load`.
+ *
+ * Note that this frees both the array and its contents. The array will
+ * be clean/reusable after this call.
+ *
+ * @param filters A filters array as supplied by `git_filters_load`
+ */
+extern void git_filters_free(git_vector *filters);
+
+/*
+ * Available filters
+ */
-/* Gather stats for a piece of text */
-extern void git_text__stat(git_text_stats *stats, const git_buf *text);
+/* Strip CRLF, from Worktree to ODB */
+extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path);
-/* Heuristics on a set of text stats to check whether it's binary
- * text or not */
-extern int git_text__is_binary(git_text_stats *stats);
-/* Available filters */
-extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path);
+/*
+ * PLAINTEXT API
+ */
+
+/*
+ * Gather stats for a piece of text
+ *
+ * Fill the `stats` structure with information on the number of
+ * unreadable characters, carriage returns, etc, so it can be
+ * used in heuristics.
+ */
+extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
+
+/*
+ * Process `git_text_stats` data generated by `git_text_stat` to see
+ * if it qualifies as a binary file
+ */
+extern int git_text_is_binary(git_text_stats *stats);
#endif