From 13224ea4aad9a1b3c9cc4c992ceaea9af623e047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Mon, 27 Feb 2012 04:28:31 +0100 Subject: buffer: Unify `git_fbuffer` and `git_buf` This makes so much sense that I can't believe it hasn't been done before. Kill the old `git_fbuffer` and read files straight into `git_buf` objects. Also: In order to fully support 4GB files in 32-bit systems, the `git_buf` implementation has been changed from using `ssize_t` for storage and storing negative values on allocation failure, to using `size_t` and changing the buffer pointer to a magical pointer on allocation failure. Hopefully this won't break anything. --- src/attr_file.c | 6 ++--- src/buffer.c | 45 ++++++++++++++++++++++----------- src/buffer.h | 9 ++++--- src/config_file.c | 22 ++++++++--------- src/fileops.c | 74 ++++++++++++++++++++++++------------------------------- src/fileops.h | 13 ++-------- src/ignore.c | 6 ++--- src/index.c | 6 ++--- src/odb.c | 7 +++--- src/odb_loose.c | 26 +++++++++---------- src/reflog.c | 6 ++--- src/refs.c | 48 ++++++++++++++++++------------------ src/repository.c | 16 ++++++------ 13 files changed, 142 insertions(+), 142 deletions(-) (limited to 'src') diff --git a/src/attr_file.c b/src/attr_file.c index 7911381ea..a1b69a5bb 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -111,7 +111,7 @@ int git_attr_file__from_file( git_repository *repo, const char *path, git_attr_file *file) { int error = GIT_SUCCESS; - git_fbuffer fbuf = GIT_FBUFFER_INIT; + git_buf fbuf = GIT_BUF_INIT; assert(path && file); @@ -120,9 +120,9 @@ int git_attr_file__from_file( if (error == GIT_SUCCESS && (error = git_futils_readbuffer(&fbuf, path)) == GIT_SUCCESS) - error = git_attr_file__from_buffer(repo, fbuf.data, file); + error = git_attr_file__from_buffer(repo, fbuf.ptr, file); - git_futils_freebuffer(&fbuf); + git_buf_free(&fbuf); if (error != GIT_SUCCESS) git__rethrow(error, "Could not open attribute file '%s'", path); diff --git a/src/buffer.c b/src/buffer.c index 183da7c5f..b9f62cc30 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -7,14 +7,17 @@ #include "buffer.h" #include "posix.h" #include +#include /* Used as default value for git_buf->ptr so that people can always * assume ptr is non-NULL and zero terminated even for new git_bufs. */ char git_buf_initbuf[1]; +static char git_buf__oom; + #define ENSURE_SIZE(b, d) \ - if ((ssize_t)(d) > buf->asize && git_buf_grow(b, (d)) < GIT_SUCCESS)\ + if ((d) > buf->asize && git_buf_grow(b, (d)) < GIT_SUCCESS)\ return GIT_ENOMEM; @@ -31,8 +34,10 @@ void git_buf_init(git_buf *buf, size_t initial_size) int git_buf_grow(git_buf *buf, size_t target_size) { int error = git_buf_try_grow(buf, target_size); - if (error != GIT_SUCCESS) - buf->asize = -1; + if (error != GIT_SUCCESS) { + buf->ptr = &git_buf__oom; + } + return error; } @@ -41,17 +46,17 @@ int git_buf_try_grow(git_buf *buf, size_t target_size) char *new_ptr; size_t new_size; - if (buf->asize < 0) + if (buf->ptr == &git_buf__oom) return GIT_ENOMEM; - if (target_size <= (size_t)buf->asize) + if (target_size <= buf->asize) return GIT_SUCCESS; if (buf->asize == 0) { new_size = target_size; new_ptr = NULL; } else { - new_size = (size_t)buf->asize; + new_size = buf->asize; new_ptr = buf->ptr; } @@ -64,7 +69,6 @@ int git_buf_try_grow(git_buf *buf, size_t target_size) new_size = (new_size + 7) & ~7; new_ptr = git__realloc(new_ptr, new_size); - /* if realloc fails, return without modifying the git_buf */ if (!new_ptr) return GIT_ENOMEM; @@ -83,7 +87,7 @@ void git_buf_free(git_buf *buf) { if (!buf) return; - if (buf->ptr != git_buf_initbuf) + if (buf->ptr != git_buf_initbuf && buf->ptr != &git_buf__oom) git__free(buf->ptr); git_buf_init(buf, 0); @@ -98,12 +102,12 @@ void git_buf_clear(git_buf *buf) int git_buf_oom(const git_buf *buf) { - return (buf->asize < 0); + return (buf->ptr == &git_buf__oom); } int git_buf_lasterror(const git_buf *buf) { - return (buf->asize < 0) ? GIT_ENOMEM : GIT_SUCCESS; + return (buf->ptr == &git_buf__oom) ? GIT_ENOMEM : GIT_SUCCESS; } int git_buf_set(git_buf *buf, const char *data, size_t len) @@ -162,11 +166,12 @@ int git_buf_printf(git_buf *buf, const char *format, ...) va_end(arglist); if (len < 0) { - buf->asize = -1; + free(buf->ptr); + buf->ptr = &git_buf__oom; return GIT_ENOMEM; } - if (len + 1 <= buf->asize - buf->size) { + if ((size_t)len + 1 <= buf->asize - buf->size) { buf->size += len; break; } @@ -205,9 +210,9 @@ void git_buf_consume(git_buf *buf, const char *end) } } -void git_buf_truncate(git_buf *buf, ssize_t len) +void git_buf_truncate(git_buf *buf, size_t len) { - if (len >= 0 && len < buf->size) { + if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } @@ -238,7 +243,7 @@ char *git_buf_detach(git_buf *buf) return data; } -void git_buf_attach(git_buf *buf, char *ptr, ssize_t asize) +void git_buf_attach(git_buf *buf, char *ptr, size_t asize) { git_buf_free(buf); @@ -372,3 +377,13 @@ int git_buf_join( return error; } + +void git_buf_rtrim(git_buf *buf) +{ + while (buf->size > 0) { + if (!isspace(buf->ptr[buf->size - 1])) + break; + + buf->size--; + } +} diff --git a/src/buffer.h b/src/buffer.h index 3969f461e..3e9cb1713 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -11,7 +11,7 @@ typedef struct { char *ptr; - ssize_t asize, size; + size_t asize, size; } git_buf; extern char git_buf_initbuf[]; @@ -47,7 +47,7 @@ int git_buf_try_grow(git_buf *buf, size_t target_size); void git_buf_free(git_buf *buf); void git_buf_swap(git_buf *buf_a, git_buf *buf_b); char *git_buf_detach(git_buf *buf); -void git_buf_attach(git_buf *buf, char *ptr, ssize_t asize); +void git_buf_attach(git_buf *buf, char *ptr, size_t asize); /** * Test if there have been any reallocation failures with this git_buf. @@ -83,7 +83,7 @@ int git_buf_puts(git_buf *buf, const char *string); int git_buf_printf(git_buf *buf, const char *format, ...) GIT_FORMAT_PRINTF(2, 3); void git_buf_clear(git_buf *buf); void git_buf_consume(git_buf *buf, const char *end); -void git_buf_truncate(git_buf *buf, ssize_t len); +void git_buf_truncate(git_buf *buf, size_t len); void git_buf_rtruncate_at_char(git_buf *path, char separator); int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...); @@ -115,4 +115,7 @@ GIT_INLINE(int) git_buf_rfind_next(git_buf *buf, char ch) return idx; } +/* Remove whitespace from the end of the buffer */ +void git_buf_rtrim(git_buf *buf); + #endif diff --git a/src/config_file.c b/src/config_file.c index c9c7d11eb..ce76493c7 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -73,7 +73,7 @@ typedef struct { git_hashtable *values; struct { - git_fbuffer buffer; + git_buf buffer; char *read_ptr; int line_number; int eof; @@ -151,6 +151,7 @@ static int config_open(git_config_file *cfg) if (b->values == NULL) return GIT_ENOMEM; + git_buf_init(&b->reader.buffer, 0); error = git_futils_readbuffer(&b->reader.buffer, b->file_path); /* It's fine if the file doesn't exist */ @@ -164,14 +165,14 @@ static int config_open(git_config_file *cfg) if (error < GIT_SUCCESS) goto cleanup; - git_futils_freebuffer(&b->reader.buffer); + git_buf_free(&b->reader.buffer); return GIT_SUCCESS; cleanup: free_vars(b->values); b->values = NULL; - git_futils_freebuffer(&b->reader.buffer); + git_buf_free(&b->reader.buffer); return git__rethrow(error, "Failed to open config"); } @@ -765,7 +766,7 @@ static int skip_bom(diskfile_backend *cfg) { static const char utf8_bom[] = "\xef\xbb\xbf"; - if (cfg->reader.buffer.len < sizeof(utf8_bom)) + if (cfg->reader.buffer.size < sizeof(utf8_bom)) return GIT_SUCCESS; if (memcmp(cfg->reader.read_ptr, utf8_bom, sizeof(utf8_bom)) == 0) @@ -847,7 +848,7 @@ static int config_parse(diskfile_backend *cfg_file) git_buf buf = GIT_BUF_INIT; /* Initialize the reading position */ - cfg_file->reader.read_ptr = cfg_file->reader.buffer.data; + cfg_file->reader.read_ptr = cfg_file->reader.buffer.ptr; cfg_file->reader.eof = 0; /* If the file is empty, there's nothing for us to do */ @@ -976,10 +977,9 @@ static int config_write(diskfile_backend *cfg, const char *key, const regex_t *p cfg->reader.read_ptr = NULL; cfg->reader.eof = 1; data_start = NULL; - cfg->reader.buffer.len = 0; - cfg->reader.buffer.data = NULL; + git_buf_clear(&cfg->reader.buffer); } else { - cfg->reader.read_ptr = cfg->reader.buffer.data; + cfg->reader.read_ptr = cfg->reader.buffer.ptr; cfg->reader.eof = 0; data_start = cfg->reader.read_ptr; } @@ -1093,7 +1093,7 @@ static int config_write(diskfile_backend *cfg, const char *key, const regex_t *p /* And then the write out rest of the file */ error = git_filebuf_write(&file, post_start, - cfg->reader.buffer.len - (post_start - data_start)); + cfg->reader.buffer.size - (post_start - data_start)); if (error < GIT_SUCCESS) { git__rethrow(error, "Failed to write the rest of the file"); @@ -1128,7 +1128,7 @@ static int config_write(diskfile_backend *cfg, const char *key, const regex_t *p goto cleanup; } - error = git_filebuf_write(&file, cfg->reader.buffer.data, cfg->reader.buffer.len); + error = git_filebuf_write(&file, cfg->reader.buffer.ptr, cfg->reader.buffer.size); if (error < GIT_SUCCESS) { git__rethrow(error, "Failed to write original config content"); goto cleanup; @@ -1155,7 +1155,7 @@ static int config_write(diskfile_backend *cfg, const char *key, const regex_t *p else error = git_filebuf_commit(&file, GIT_CONFIG_FILE_MODE); - git_futils_freebuffer(&cfg->reader.buffer); + git_buf_free(&cfg->reader.buffer); return error; } diff --git a/src/fileops.c b/src/fileops.c index 3241c68b1..d2b4af51e 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -97,87 +97,77 @@ mode_t git_futils_canonical_mode(mode_t raw_mode) return 0; } -int git_futils_readbuffer_updated(git_fbuffer *obj, const char *path, time_t *mtime, int *updated) +int git_futils_readbuffer_updated(git_buf *buf, const char *path, time_t *mtime, int *updated) { git_file fd; size_t len; struct stat st; - unsigned char *buff; - assert(obj && path && *path); + assert(buf && path && *path); if (updated != NULL) *updated = 0; - if (p_stat(path, &st) < 0) - return git__throw(GIT_ENOTFOUND, "Failed to stat file %s", path); + if ((fd = p_open(path, O_RDONLY)) < 0) { + return git__throw(GIT_ENOTFOUND, "Failed to read file '%s': %s", path, strerror(errno)); + } - if (S_ISDIR(st.st_mode)) - return git__throw(GIT_ERROR, "Can't read a dir into a buffer"); + if (p_fstat(fd, &st) < 0 || S_ISDIR(st.st_mode) || !git__is_sizet(st.st_size+1)) { + close(fd); + return git__throw(GIT_EOSERR, "Failed to stat file '%s'", path); + } /* * If we were given a time, we only want to read the file if it * has been modified. */ - if (mtime != NULL && *mtime >= st.st_mtime) - return GIT_SUCCESS; + if (mtime != NULL && *mtime >= st.st_mtime) { + close(fd); + return 0; + } if (mtime != NULL) *mtime = st.st_mtime; - if (!git__is_sizet(st.st_size+1)) - return git__throw(GIT_ERROR, "Failed to read file `%s`. An error occured while calculating its size", path); len = (size_t) st.st_size; - if ((fd = p_open(path, O_RDONLY)) < 0) - return git__throw(GIT_EOSERR, "Failed to open %s for reading", path); + git_buf_clear(buf); - if ((buff = git__malloc(len + 1)) == NULL) { - p_close(fd); + if (git_buf_grow(buf, len + 1) < 0) { + close(fd); return GIT_ENOMEM; } - if (p_read(fd, buff, len) < 0) { - p_close(fd); - git__free(buff); - return git__throw(GIT_ERROR, "Failed to read file `%s`", path); + buf->ptr[len] = '\0'; + + while (len > 0) { + ssize_t read_size = p_read(fd, buf->ptr, len); + + if (read_size < 0) { + close(fd); + return git__throw(GIT_EOSERR, "Failed to read from FD"); + } + + len -= read_size; + buf->size += read_size; } - buff[len] = '\0'; p_close(fd); if (mtime != NULL) *mtime = st.st_mtime; + if (updated != NULL) *updated = 1; - obj->data = buff; - obj->len = len; - - return GIT_SUCCESS; -} - -int git_futils_readbuffer(git_fbuffer *obj, const char *path) -{ - return git_futils_readbuffer_updated(obj, path, NULL, NULL); + return 0; } -void git_futils_fbuffer_rtrim(git_fbuffer *obj) +int git_futils_readbuffer(git_buf *buf, const char *path) { - unsigned char *buff = obj->data; - while (obj->len > 0 && isspace(buff[obj->len - 1])) - obj->len--; - buff[obj->len] = '\0'; + return git_futils_readbuffer_updated(buf, path, NULL, NULL); } -void git_futils_freebuffer(git_fbuffer *obj) -{ - assert(obj); - git__free(obj->data); - obj->data = NULL; -} - - int git_futils_mv_withpath(const char *from, const char *to, const mode_t dirmode) { if (git_futils_mkpath2file(to, dirmode) < GIT_SUCCESS) diff --git a/src/fileops.h b/src/fileops.h index 4c114026b..43ef21521 100644 --- a/src/fileops.h +++ b/src/fileops.h @@ -17,17 +17,8 @@ * * Read whole files into an in-memory buffer for processing */ -#define GIT_FBUFFER_INIT {NULL, 0} - -typedef struct { /* file io buffer */ - void *data; /* data bytes */ - size_t len; /* data length */ -} git_fbuffer; - -extern int git_futils_readbuffer(git_fbuffer *obj, const char *path); -extern int git_futils_readbuffer_updated(git_fbuffer *obj, const char *path, time_t *mtime, int *updated); -extern void git_futils_freebuffer(git_fbuffer *obj); -extern void git_futils_fbuffer_rtrim(git_fbuffer *obj); +extern int git_futils_readbuffer(git_buf *obj, const char *path); +extern int git_futils_readbuffer_updated(git_buf *obj, const char *path, time_t *mtime, int *updated); /** * File utils diff --git a/src/ignore.c b/src/ignore.c index 30f86b822..a3bf0a282 100644 --- a/src/ignore.c +++ b/src/ignore.c @@ -11,7 +11,7 @@ static int load_ignore_file( git_repository *repo, const char *path, git_attr_file *ignores) { int error = GIT_SUCCESS; - git_fbuffer fbuf = GIT_FBUFFER_INIT; + git_buf fbuf = GIT_BUF_INIT; git_attr_fnmatch *match = NULL; const char *scan = NULL; char *context = NULL; @@ -28,7 +28,7 @@ static int load_ignore_file( if (error == GIT_SUCCESS) error = git_futils_readbuffer(&fbuf, path); - scan = fbuf.data; + scan = fbuf.ptr; while (error == GIT_SUCCESS && *scan) { if (!match && !(match = git__calloc(1, sizeof(git_attr_fnmatch)))) { @@ -53,7 +53,7 @@ static int load_ignore_file( } } - git_futils_freebuffer(&fbuf); + git_buf_free(&fbuf); git__free(match); git__free(context); diff --git a/src/index.c b/src/index.c index 4dccad527..5ac99de3e 100644 --- a/src/index.c +++ b/src/index.c @@ -216,7 +216,7 @@ void git_index_clear(git_index *index) int git_index_read(git_index *index) { int error = GIT_SUCCESS, updated; - git_fbuffer buffer = GIT_FBUFFER_INIT; + git_buf buffer = GIT_BUF_INIT; time_t mtime; assert(index->index_file_path); @@ -235,12 +235,12 @@ int git_index_read(git_index *index) if (updated) { git_index_clear(index); - error = parse_index(index, buffer.data, buffer.len); + error = parse_index(index, buffer.ptr, buffer.size); if (error == GIT_SUCCESS) index->last_modified = mtime; - git_futils_freebuffer(&buffer); + git_buf_free(&buffer); } if (error < GIT_SUCCESS) diff --git a/src/odb.c b/src/odb.c index 4eaf289e7..81fc82ba8 100644 --- a/src/odb.c +++ b/src/odb.c @@ -393,8 +393,8 @@ static int add_default_backends(git_odb *db, const char *objects_dir, int as_alt static int load_alternates(git_odb *odb, const char *objects_dir) { git_buf alternates_path = GIT_BUF_INIT; + git_buf alternates_buf = GIT_BUF_INIT; char *buffer; - git_fbuffer alternates_buf = GIT_FBUFFER_INIT; const char *alternate; int error; @@ -412,7 +412,7 @@ static int load_alternates(git_odb *odb, const char *objects_dir) return git__throw(GIT_EOSERR, "Failed to add backend. Can't read alternates"); } - buffer = (char *)alternates_buf.data; + buffer = (char *)alternates_buf.ptr; error = GIT_SUCCESS; /* add each alternate as a new backend; one alternate per line */ @@ -433,7 +433,8 @@ static int load_alternates(git_odb *odb, const char *objects_dir) } git_buf_free(&alternates_path); - git_futils_freebuffer(&alternates_buf); + git_buf_free(&alternates_buf); + if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to load alternates"); return error; diff --git a/src/odb_loose.c b/src/odb_loose.c index bb2b7b5f5..f5f6e35ac 100644 --- a/src/odb_loose.c +++ b/src/odb_loose.c @@ -75,13 +75,13 @@ static int object_file_name(git_buf *name, const char *dir, const git_oid *id) } -static size_t get_binary_object_header(obj_hdr *hdr, git_fbuffer *obj) +static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj) { unsigned char c; - unsigned char *data = obj->data; + unsigned char *data = (unsigned char *)obj->ptr; size_t shift, size, used = 0; - if (obj->len == 0) + if (obj->size == 0) return 0; c = data[used++]; @@ -90,7 +90,7 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_fbuffer *obj) size = c & 15; shift = 4; while (c & 0x80) { - if (obj->len <= used) + if (obj->size <= used) return 0; if (sizeof(size_t) * 8 <= shift) return 0; @@ -177,12 +177,12 @@ static void set_stream_output(z_stream *s, void *out, size_t len) } -static int start_inflate(z_stream *s, git_fbuffer *obj, void *out, size_t len) +static int start_inflate(z_stream *s, git_buf *obj, void *out, size_t len) { int status; init_stream(s, out, len); - set_stream_input(s, obj->data, obj->len); + set_stream_input(s, obj->ptr, obj->size); if ((status = inflateInit(s)) < Z_OK) return status; @@ -287,7 +287,7 @@ static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr) * of loose object data into packs. This format is no longer used, but * we must still read it. */ -static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_fbuffer *obj) +static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj) { unsigned char *in, *buf; obj_hdr hdr; @@ -310,8 +310,8 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_fbuffer *obj) if (!buf) return GIT_ENOMEM; - in = ((unsigned char *)obj->data) + used; - len = obj->len - used; + in = ((unsigned char *)obj->ptr) + used; + len = obj->size - used; if (inflate_buffer(in, len, buf, hdr.size)) { git__free(buf); return git__throw(GIT_ERROR, "Failed to inflate loose object. Could not inflate buffer"); @@ -325,7 +325,7 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_fbuffer *obj) return GIT_SUCCESS; } -static int inflate_disk_obj(git_rawobj *out, git_fbuffer *obj) +static int inflate_disk_obj(git_rawobj *out, git_buf *obj) { unsigned char head[64], *buf; z_stream zs; @@ -335,7 +335,7 @@ static int inflate_disk_obj(git_rawobj *out, git_fbuffer *obj) /* * check for a pack-like loose object */ - if (!is_zlib_compressed_data(obj->data)) + if (!is_zlib_compressed_data((unsigned char *)obj->ptr)) return inflate_packlike_loose_disk_obj(out, obj); /* @@ -383,7 +383,7 @@ static int inflate_disk_obj(git_rawobj *out, git_fbuffer *obj) static int read_loose(git_rawobj *out, git_buf *loc) { int error; - git_fbuffer obj = GIT_FBUFFER_INIT; + git_buf obj = GIT_BUF_INIT; assert(out && loc); @@ -398,7 +398,7 @@ static int read_loose(git_rawobj *out, git_buf *loc) return git__throw(GIT_ENOTFOUND, "Failed to read loose object. File not found"); error = inflate_disk_obj(out, &obj); - git_futils_freebuffer(&obj); + git_buf_free(&obj); return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to read loose object"); } diff --git a/src/reflog.c b/src/reflog.c index 9f5ccd322..6ca9418cf 100644 --- a/src/reflog.c +++ b/src/reflog.c @@ -183,7 +183,7 @@ int git_reflog_read(git_reflog **reflog, git_reference *ref) { int error; git_buf log_path = GIT_BUF_INIT; - git_fbuffer log_file = GIT_FBUFFER_INIT; + git_buf log_file = GIT_BUF_INIT; git_reflog *log = NULL; *reflog = NULL; @@ -201,7 +201,7 @@ int git_reflog_read(git_reflog **reflog, git_reference *ref) goto cleanup; } - if ((error = reflog_parse(log, log_file.data, log_file.len)) < GIT_SUCCESS) + if ((error = reflog_parse(log, log_file.ptr, log_file.size)) < GIT_SUCCESS) git__rethrow(error, "Failed to read reflog"); else *reflog = log; @@ -209,7 +209,7 @@ int git_reflog_read(git_reflog **reflog, git_reference *ref) cleanup: if (error != GIT_SUCCESS && log != NULL) git_reflog_free(log); - git_futils_freebuffer(&log_file); + git_buf_free(&log_file); git_buf_free(&log_path); return error; diff --git a/src/refs.c b/src/refs.c index 8e911c1ae..2e1d92da2 100644 --- a/src/refs.c +++ b/src/refs.c @@ -32,15 +32,15 @@ struct packref { static const int default_table_size = 32; static int reference_read( - git_fbuffer *file_content, + git_buf *file_content, time_t *mtime, const char *repo_path, const char *ref_name, int *updated); /* loose refs */ -static int loose_parse_symbolic(git_reference *ref, git_fbuffer *file_content); -static int loose_parse_oid(git_oid *ref, git_fbuffer *file_content); +static int loose_parse_symbolic(git_reference *ref, git_buf *file_content); +static int loose_parse_oid(git_oid *ref, git_buf *file_content); static int loose_lookup(git_reference *ref); static int loose_lookup_to_packfile(struct packref **ref_out, git_repository *repo, const char *name); @@ -113,7 +113,7 @@ static int reference_alloc( return GIT_SUCCESS; } -static int reference_read(git_fbuffer *file_content, time_t *mtime, const char *repo_path, const char *ref_name, int *updated) +static int reference_read(git_buf *file_content, time_t *mtime, const char *repo_path, const char *ref_name, int *updated) { git_buf path = GIT_BUF_INIT; int error = GIT_SUCCESS; @@ -129,15 +129,15 @@ static int reference_read(git_fbuffer *file_content, time_t *mtime, const char * return error; } -static int loose_parse_symbolic(git_reference *ref, git_fbuffer *file_content) +static int loose_parse_symbolic(git_reference *ref, git_buf *file_content) { const unsigned int header_len = strlen(GIT_SYMREF); const char *refname_start; char *eol; - refname_start = (const char *)file_content->data; + refname_start = (const char *)file_content->ptr; - if (file_content->len < (header_len + 1)) + if (file_content->size < (header_len + 1)) return git__throw(GIT_EOBJCORRUPTED, "Failed to parse loose reference. Object too short"); @@ -165,15 +165,15 @@ static int loose_parse_symbolic(git_reference *ref, git_fbuffer *file_content) return GIT_SUCCESS; } -static int loose_parse_oid(git_oid *oid, git_fbuffer *file_content) +static int loose_parse_oid(git_oid *oid, git_buf *file_content) { int error; char *buffer; - buffer = (char *)file_content->data; + buffer = (char *)file_content->ptr; /* File format: 40 chars (OID) + newline */ - if (file_content->len < GIT_OID_HEXSZ + 1) + if (file_content->size < GIT_OID_HEXSZ + 1) return git__throw(GIT_EOBJCORRUPTED, "Failed to parse loose reference. Reference too short"); @@ -193,26 +193,26 @@ static int loose_parse_oid(git_oid *oid, git_fbuffer *file_content) static git_rtype loose_guess_rtype(const git_buf *full_path) { - git_fbuffer ref_file = GIT_FBUFFER_INIT; + git_buf ref_file = GIT_BUF_INIT; git_rtype type; type = GIT_REF_INVALID; if (git_futils_readbuffer(&ref_file, full_path->ptr) == GIT_SUCCESS) { - if (git__prefixcmp((const char *)(ref_file.data), GIT_SYMREF) == 0) + if (git__prefixcmp((const char *)(ref_file.ptr), GIT_SYMREF) == 0) type = GIT_REF_SYMBOLIC; else type = GIT_REF_OID; } - git_futils_freebuffer(&ref_file); + git_buf_free(&ref_file); return type; } static int loose_lookup(git_reference *ref) { int error = GIT_SUCCESS, updated; - git_fbuffer ref_file = GIT_FBUFFER_INIT; + git_buf ref_file = GIT_BUF_INIT; if (reference_read(&ref_file, &ref->mtime, ref->owner->path_repository, ref->name, &updated) < GIT_SUCCESS) @@ -228,7 +228,7 @@ static int loose_lookup(git_reference *ref) ref->flags = 0; - if (git__prefixcmp((const char *)(ref_file.data), GIT_SYMREF) == 0) { + if (git__prefixcmp((const char *)(ref_file.ptr), GIT_SYMREF) == 0) { ref->flags |= GIT_REF_SYMBOLIC; error = loose_parse_symbolic(ref, &ref_file); } else { @@ -236,7 +236,7 @@ static int loose_lookup(git_reference *ref) error = loose_parse_oid(&ref->target.oid, &ref_file); } - git_futils_freebuffer(&ref_file); + git_buf_free(&ref_file); if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to lookup loose reference"); @@ -250,7 +250,7 @@ static int loose_lookup_to_packfile( const char *name) { int error = GIT_SUCCESS; - git_fbuffer ref_file = GIT_FBUFFER_INIT; + git_buf ref_file = GIT_BUF_INIT; struct packref *ref = NULL; size_t name_len; @@ -273,11 +273,11 @@ static int loose_lookup_to_packfile( ref->flags = GIT_PACKREF_WAS_LOOSE; *ref_out = ref; - git_futils_freebuffer(&ref_file); + git_buf_free(&ref_file); return GIT_SUCCESS; cleanup: - git_futils_freebuffer(&ref_file); + git_buf_free(&ref_file); free(ref); return git__rethrow(error, "Failed to lookup loose reference"); } @@ -427,7 +427,7 @@ cleanup: static int packed_load(git_repository *repo) { int error = GIT_SUCCESS, updated; - git_fbuffer packfile = GIT_FBUFFER_INIT; + git_buf packfile = GIT_BUF_INIT; const char *buffer_start, *buffer_end; git_refcache *ref_cache = &repo->references; @@ -468,8 +468,8 @@ static int packed_load(git_repository *repo) git_hashtable_clear(ref_cache->packfile); - buffer_start = (const char *)packfile.data; - buffer_end = (const char *)(buffer_start) + packfile.len; + buffer_start = (const char *)packfile.ptr; + buffer_end = (const char *)(buffer_start) + packfile.size; while (buffer_start < buffer_end && buffer_start[0] == '#') { buffer_start = strchr(buffer_start, '\n'); @@ -500,13 +500,13 @@ static int packed_load(git_repository *repo) } } - git_futils_freebuffer(&packfile); + git_buf_free(&packfile); return GIT_SUCCESS; cleanup: git_hashtable_free(ref_cache->packfile); ref_cache->packfile = NULL; - git_futils_freebuffer(&packfile); + git_buf_free(&packfile); return git__rethrow(error, "Failed to load packed references"); } diff --git a/src/repository.c b/src/repository.c index f394d06fe..c46dd9df9 100644 --- a/src/repository.c +++ b/src/repository.c @@ -467,7 +467,7 @@ static int retrieve_ceiling_directories_offset( */ static int read_gitfile(git_buf *path_out, const char *file_path, const char *base_path) { - git_fbuffer file; + git_buf file = GIT_BUF_INIT; int error; assert(path_out && file_path); @@ -476,22 +476,22 @@ static int read_gitfile(git_buf *path_out, const char *file_path, const char *ba if (error < GIT_SUCCESS) return error; - if (git__prefixcmp((char *)file.data, GIT_FILE_CONTENT_PREFIX)) { - git_futils_freebuffer(&file); + if (git__prefixcmp((char *)file.ptr, GIT_FILE_CONTENT_PREFIX)) { + git_buf_free(&file); return git__throw(GIT_ENOTFOUND, "Invalid gitfile format `%s`", file_path); } - git_futils_fbuffer_rtrim(&file); + git_buf_rtrim(&file); - if (strlen(GIT_FILE_CONTENT_PREFIX) == file.len) { - git_futils_freebuffer(&file); + if (strlen(GIT_FILE_CONTENT_PREFIX) == file.size) { + git_buf_free(&file); return git__throw(GIT_ENOTFOUND, "No path in git file `%s`", file_path); } error = git_path_prettify_dir(path_out, - ((char *)file.data) + strlen(GIT_FILE_CONTENT_PREFIX), base_path); + ((char *)file.ptr) + strlen(GIT_FILE_CONTENT_PREFIX), base_path); - git_futils_freebuffer(&file); + git_buf_free(&file); if (error == GIT_SUCCESS && git_path_exists(path_out->ptr) == 0) return GIT_SUCCESS; -- cgit v1.2.1 From 44b1ff4c1209c34360cc0c43761c40f5f5020886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Mon, 27 Feb 2012 04:31:05 +0100 Subject: filter: Apply filters before writing a file to the ODB Initial implementation. The relevant code is in `blob.c`: the blob write function has been split into smaller functions. - Directly write a file to the ODB in streaming mode - Directly write a symlink to the ODB in direct mode - Apply a filter, and write a file to the ODB in direct mode When trying to write a file, we first call `git_filter__load_for_file`, which populates a filters array with the required filters based on the filename. If no filters are resolved to the filename, we can write to the ODB in streaming mode straight from disk. Otherwise, we load the whole file in memory and use double-buffering to apply the filter chain. We finish by writing the file as a whole to the ODB. --- src/blob.c | 156 +++++++++++++++++++++++++++++++++++++++++------------------ src/buffer.c | 2 +- src/filter.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/filter.h | 40 +++++++++++++++ 4 files changed, 281 insertions(+), 49 deletions(-) create mode 100644 src/filter.c create mode 100644 src/filter.h (limited to 'src') diff --git a/src/blob.c b/src/blob.c index 4065ffa12..57a31041e 100644 --- a/src/blob.c +++ b/src/blob.c @@ -11,6 +11,7 @@ #include "common.h" #include "blob.h" +#include "filter.h" const void *git_blob_rawcontent(git_blob *blob) { @@ -65,15 +66,101 @@ int git_blob_create_frombuffer(git_oid *oid, git_repository *repo, const void *b return GIT_SUCCESS; } +static int write_file_stream(git_oid *oid, git_odb *odb, const char *path, git_off_t file_size) +{ + int fd, error; + char buffer[4096]; + git_odb_stream *stream = NULL; + + if ((error = git_odb_open_wstream(&stream, odb, file_size, GIT_OBJ_BLOB)) < GIT_SUCCESS) + return error; + + if ((fd = p_open(path, O_RDONLY)) < 0) { + error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", path); + goto cleanup; + } + + while (file_size > 0) { + ssize_t read_len = p_read(fd, buffer, sizeof(buffer)); + + if (read_len < 0) { + error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file"); + p_close(fd); + goto cleanup; + } + + stream->write(stream, buffer, read_len); + file_size -= read_len; + } + + p_close(fd); + error = stream->finalize_write(oid, stream); + +cleanup: + stream->free(stream); + return error; +} + +static int write_file_filtered( + git_oid *oid, + git_odb *odb, + const char *path, + git_vector *filters) +{ + int error; + git_buf file_in = GIT_BUF_INIT; + git_buf filter_result = GIT_BUF_INIT; + + error = git_futils_readbuffer(&file_in, path); + if (error < GIT_SUCCESS) + return error; + + error = git_filter__apply(&filter_result, &file_in, filters, path); + + if (error < GIT_SUCCESS) { + git_buf_free(&file_in); + git_buf_free(&filter_result); + return error; + } + + error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB); + + git_buf_free(&file_in); + git_buf_free(&filter_result); + + return GIT_SUCCESS; +} + +static int write_symlink(git_oid *oid, git_odb *odb, const char *path, size_t link_size) +{ + char *link_data; + ssize_t read_len; + int error; + + link_data = git__malloc(link_size); + if (!link_data) + return GIT_ENOMEM; + + read_len = p_readlink(path, link_data, link_size); + + if (read_len != (ssize_t)link_size) { + free(link_data); + return git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink"); + } + + error = git_odb_write(oid, odb, (void *)link_data, link_size, GIT_OBJ_BLOB); + free(link_data); + return error; +} + int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *path) { int error = GIT_SUCCESS; git_buf full_path = GIT_BUF_INIT; git_off_t size; - git_odb_stream *stream = NULL; struct stat st; const char *workdir; - git_odb *odb; + git_odb *odb = NULL; workdir = git_repository_workdir(repo); if (workdir == NULL) @@ -95,63 +182,36 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat if (error < GIT_SUCCESS) goto cleanup; - if ((error = git_odb_open_wstream(&stream, odb, (size_t)size, GIT_OBJ_BLOB)) < GIT_SUCCESS) - goto cleanup; - if (S_ISLNK(st.st_mode)) { - char *link_data; - ssize_t read_len; - - link_data = git__malloc((size_t)size); - if (!link_data) { - error = GIT_ENOMEM; - goto cleanup; - } - - read_len = p_readlink(full_path.ptr, link_data, (size_t)size); - - if (read_len != (ssize_t)size) { - error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read symlink"); - free(link_data); - goto cleanup; - } - - stream->write(stream, link_data, (size_t)size); - free(link_data); - + error = write_symlink(oid, odb, full_path.ptr, (size_t)size); } else { - int fd; - char buffer[2048]; + git_vector write_filters = GIT_VECTOR_INIT; - if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0) { - error = git__throw(GIT_ENOTFOUND, "Failed to create blob. Could not open '%s'", full_path.ptr); + if ((error = git_filter__load_for_file( + &write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) goto cleanup; - } - - while (size > 0) { - ssize_t read_len = p_read(fd, buffer, sizeof(buffer)); - if (read_len < 0) { - error = git__throw(GIT_EOSERR, "Failed to create blob. Can't read full file"); - p_close(fd); - goto cleanup; - } - - stream->write(stream, buffer, read_len); - size -= read_len; + if (write_filters.length == 0) { + error = write_file_stream(oid, odb, full_path.ptr, size); + } else { + error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } - p_close(fd); + /* + * TODO: eventually support streaming filtered files, for files which are bigger + * than a given threshold. This is not a priority because applying a filter in + * streaming mode changes the final size of the blob, and without knowing its + * final size, the blob cannot be written in stream mode to the ODB. + * + * The plan is to do streaming writes to a tempfile on disk and then opening + * streaming that file to the ODB, using `write_file_stream`. + * + * CAREFULLY DESIGNED APIS YO + */ } - error = stream->finalize_write(oid, stream); - cleanup: - if (stream) - stream->free(stream); - git_buf_free(&full_path); - return error; } diff --git a/src/buffer.c b/src/buffer.c index b9f62cc30..e86246f94 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -235,7 +235,7 @@ char *git_buf_detach(git_buf *buf) { char *data = buf->ptr; - if (buf->asize <= 0) + if (buf->asize == 0 || buf->ptr == &git_buf__oom) return NULL; git_buf_init(buf, 0); diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 000000000..b97ac6697 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "fileops.h" +#include "hash.h" +#include "filter.h" + +/* Fresh from Core Git. I wonder what we could use this for... */ +void git_text__stat(git_text_stats *stats, git_buf *text) +{ + size_t i; + + memset(stats, 0, sizeof(*stats)); + + for (i = 0; i < text->size; i++) { + unsigned char c = text->ptr[i]; + + if (c == '\r') { + stats->cr++; + + if (i + 1 < text->size && text->ptr[i + 1] == '\n') + stats->crlf++; + + continue; + } + + if (c == '\n') { + stats->lf++; + continue; + } + + if (c == 127) + /* DEL */ + stats->nonprintable++; + + else if (c < 32) { + switch (c) { + /* BS, HT, ESC and FF */ + case '\b': case '\t': case '\033': case '\014': + stats->printable++; + break; + case 0: + stats->nul++; + /* fall through */ + default: + stats->nonprintable++; + } + } + else + stats->printable++; + } + + /* If file ends with EOF then don't count this EOF as non-printable. */ + if (text->size >= 1 && text->ptr[text->size - 1] == '\032') + stats->nonprintable--; +} + +/* + * Fresh from Core Git + */ +int git_text__is_binary(git_text_stats *stats) +{ + if (stats->nul) + return 1; + + if ((stats->printable >> 7) < stats->nonprintable) + return 1; + /* + * Other heuristics? Average line length might be relevant, + * as might LF vs CR vs CRLF counts.. + * + * NOTE! It might be normal to have a low ratio of CRLF to LF + * (somebody starts with a LF-only file and edits it with an editor + * that adds CRLF only to lines that are added..). But do we + * want to support CR-only? Probably not. + */ + return 0; +} + +int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode) +{ + /* We don't load any filters yet. HAHA */ + return 0; +} + +int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename) +{ + unsigned int src, dst, i; + git_buf *dbuffer[2]; + + dbuffer[0] = source; + dbuffer[1] = dest; + + src = 0; + + /* Pre-grow the destination buffer to more or less the size + * we expect it to have */ + if (git_buf_grow(dest, source->size) < 0) + return GIT_ENOMEM; + + for (i = 0; i < filters->length; ++i) { + git_filter_cb filter = git_vector_get(filters, i); + dst = (src + 1) % 2; + + git_buf_clear(dbuffer[dst]); + + /* Apply the filter, from dbuffer[src] to dbuffer[dst]; + * if the filtering is canceled by the user mid-filter, + * we skip to the next filter without changing the source + * of the double buffering (so that the text goes through + * cleanly). + */ + if (filter(dbuffer[dst], dbuffer[src], filename) == 0) { + src = (src + 1) % 2; + } + + if (git_buf_oom(dbuffer[dst])) + return GIT_ENOMEM; + } + + /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ + if (dst != 1) { + git_buf_swap(dest, source); + } + + return GIT_SUCCESS; +} diff --git a/src/filter.h b/src/filter.h new file mode 100644 index 000000000..9a8f84972 --- /dev/null +++ b/src/filter.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_filter_h__ +#define INCLUDE_filter_h__ + +#include "common.h" +#include "buffer.h" +#include "git2/odb.h" +#include "git2/repository.h" + +typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename); + +typedef enum { + GIT_FILTER_TO_WORKTREE, + GIT_FILTER_TO_ODB +} git_filter_mode; + +typedef struct { + /* NUL, CR, LF and CRLF counts */ + unsigned int nul, cr, lf, crlf; + + /* These are just approximations! */ + unsigned int printable, nonprintable; +} git_text_stats; + +extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); +extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename); + +/* Gather stats for a piece of text */ +extern void git_text__stat(git_text_stats *stats, git_buf *text); + +/* Heuristics on a set of text stats to check whether it's binary + * text or not */ +extern int git_text__is_binary(git_text_stats *stats); + +#endif -- cgit v1.2.1 From eb8f90e523b344fc24358994ad63e737520b85b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Mon, 27 Feb 2012 17:22:51 +0100 Subject: buffer: Null terminate on rtrim --- src/buffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/buffer.c b/src/buffer.c index e86246f94..68cc39388 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -386,4 +386,6 @@ void git_buf_rtrim(git_buf *buf) buf->size--; } + + buf->ptr[buf->size] = '\0'; } -- cgit v1.2.1 From 450b40cab39c786bf67e7491755e7d0b3a4dc3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Tue, 28 Feb 2012 01:13:32 +0100 Subject: filter: Load attributes for file --- src/filter.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/filter.h | 36 +++++++++++++++++++++++ src/repository.h | 5 ++++ 3 files changed, 129 insertions(+) (limited to 'src') diff --git a/src/filter.c b/src/filter.c index b97ac6697..1775c09c7 100644 --- a/src/filter.c +++ b/src/filter.c @@ -10,6 +10,8 @@ #include "hash.h" #include "filter.h" +#include "git2/attr.h" + /* Fresh from Core Git. I wonder what we could use this for... */ void git_text__stat(git_text_stats *stats, git_buf *text) { @@ -130,3 +132,89 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const return GIT_SUCCESS; } + + +static int check_crlf(const char *value) +{ + if (value == git_attr__true) + return GIT_CRLF_TEXT; + + if (value == git_attr__false) + return GIT_CRLF_BINARY; + + if (value == NULL) + return GIT_CRLF_GUESS; + + if (strcmp(value, "input") == 0) + return GIT_CRLF_INPUT; + + if (strcmp(value, "auto") == 0) + return GIT_CRLF_AUTO; + + return GIT_CRLF_GUESS; +} + +static int check_eol(const char *value) +{ + if (value == NULL) + return GIT_EOL_UNSET; + + if (strcmp(value, "lf") == 0) + return GIT_EOL_LF; + + if (strcmp(value, "crlf") == 0) + return GIT_EOL_CRLF; + + return GIT_EOL_UNSET; +} + +static int check_ident(const char *value) +{ + return (value == git_attr__true); +} + +#if 0 +static int input_crlf_action(enum crlf_action text_attr, enum eol eol_attr) +{ + if (text_attr == CRLF_BINARY) + return CRLF_BINARY; + if (eol_attr == EOL_LF) + return CRLF_INPUT; + if (eol_attr == EOL_CRLF) + return CRLF_CRLF; + return text_attr; +} +#endif + +int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path) +{ +#define NUM_CONV_ATTRS 5 + + static const char *attr_names[NUM_CONV_ATTRS] = { + "crlf", "ident", "filter", "eol", "text", + }; + + const char *attr_vals[NUM_CONV_ATTRS]; + int error; + + error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals); + + if (error == GIT_ENOTFOUND) { + ca->crlf_action = GIT_CRLF_GUESS; + ca->eol_attr = GIT_EOL_UNSET; + ca->ident = 0; + return 0; + } + + if (error == GIT_SUCCESS) { + ca->crlf_action = check_crlf(attr_vals[4]); /* text */ + if (ca->crlf_action == GIT_CRLF_GUESS) + ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ + + ca->ident = check_ident(attr_vals[1]); /* ident */ + ca->eol_attr = check_eol(attr_vals[3]); /* eol */ + return 0; + } + + return error; +} diff --git a/src/filter.h b/src/filter.h index 9a8f84972..2ed9da00b 100644 --- a/src/filter.h +++ b/src/filter.h @@ -19,6 +19,41 @@ typedef enum { GIT_FILTER_TO_ODB } git_filter_mode; +typedef enum { + GIT_CRLF_GUESS = -1, + GIT_CRLF_BINARY = 0, + GIT_CRLF_TEXT, + GIT_CRLF_INPUT, + GIT_CRLF_CRLF, + GIT_CRLF_AUTO, + + GIT_SAFE_CRLF_FALSE = 0, + GIT_SAFE_CRLF_FAIL = 1, + GIT_SAFE_CRLF_WARN = 2, + + GIT_AUTO_CRLF_FALSE = 0, + GIT_AUTO_CRLF_TRUE = 1, + GIT_AUTO_CRLF_INPUT = -1, +} git_crlf_t; + +typedef enum { + GIT_EOL_UNSET, + GIT_EOL_CRLF, + GIT_EOL_LF, +#ifdef GIT_WIN32 + GIT_EOL_NATIVE = GIT_EOL_CRLF +#else + GIT_EOL_NATIVE = GIT_EOL_LF +#endif +} git_eol_t; + + +typedef struct { + int crlf_action; + int eol_attr; + int ident; +} git_conv_attrs; + typedef struct { /* NUL, CR, LF and CRLF counts */ unsigned int nul, cr, lf, crlf; @@ -28,6 +63,7 @@ typedef struct { } git_text_stats; extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); +extern int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path); extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename); /* Gather stats for a piece of text */ diff --git a/src/repository.h b/src/repository.h index 516fd10be..fa19d2e38 100644 --- a/src/repository.h +++ b/src/repository.h @@ -46,6 +46,11 @@ struct git_repository { unsigned is_bare:1; unsigned int lru_counter; + + struct { + int core_eol; + int auto_crlf; + } filter_options; }; /* fully free the object; internal method, do not -- cgit v1.2.1 From 27950fa3f40f45ede9aa2b108796fd2b73b33016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Wed, 29 Feb 2012 01:26:03 +0100 Subject: filter: Add write-to CRLF filter --- src/blob.c | 24 ++++---- src/crlf.c | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/filter.c | 131 ++++++++++++---------------------------- src/filter.h | 21 ++++--- 4 files changed, 254 insertions(+), 115 deletions(-) create mode 100644 src/crlf.c (limited to 'src') diff --git a/src/blob.c b/src/blob.c index 57a31041e..245326157 100644 --- a/src/blob.c +++ b/src/blob.c @@ -104,29 +104,29 @@ cleanup: static int write_file_filtered( git_oid *oid, git_odb *odb, - const char *path, + const char *full_path, git_vector *filters) { int error; - git_buf file_in = GIT_BUF_INIT; - git_buf filter_result = GIT_BUF_INIT; + git_buf source = GIT_BUF_INIT; + git_buf dest = GIT_BUF_INIT; - error = git_futils_readbuffer(&file_in, path); + error = git_futils_readbuffer(&source, full_path); if (error < GIT_SUCCESS) return error; - error = git_filter__apply(&filter_result, &file_in, filters, path); + error = git_filter__apply(&dest, &source, filters); if (error < GIT_SUCCESS) { - git_buf_free(&file_in); - git_buf_free(&filter_result); + git_buf_free(&source); + git_buf_free(&dest); return error; } - error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB); + error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); - git_buf_free(&file_in); - git_buf_free(&filter_result); + git_buf_free(&source); + git_buf_free(&dest); return GIT_SUCCESS; } @@ -188,7 +188,7 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat git_vector write_filters = GIT_VECTOR_INIT; if ((error = git_filter__load_for_file( - &write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) + &write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) goto cleanup; if (write_filters.length == 0) { @@ -197,6 +197,8 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } + git_filter__free(&write_filters); + /* * TODO: eventually support streaming filtered files, for files which are bigger * than a given threshold. This is not a priority because applying a filter in diff --git a/src/crlf.c b/src/crlf.c new file mode 100644 index 000000000..d8dd1c382 --- /dev/null +++ b/src/crlf.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "fileops.h" +#include "hash.h" +#include "filter.h" +#include "repository.h" + +#include "git2/attr.h" + +struct crlf_attrs { + int crlf_action; + int eol; +}; + +struct crlf_filter { + git_filter f; + struct crlf_attrs attrs; +}; + +static int check_crlf(const char *value) +{ + if (value == git_attr__true) + return GIT_CRLF_TEXT; + + if (value == git_attr__false) + return GIT_CRLF_BINARY; + + if (value == NULL) + return GIT_CRLF_GUESS; + + if (strcmp(value, "input") == 0) + return GIT_CRLF_INPUT; + + if (strcmp(value, "auto") == 0) + return GIT_CRLF_AUTO; + + return GIT_CRLF_GUESS; +} + +static int check_eol(const char *value) +{ + if (value == NULL) + return GIT_EOL_UNSET; + + if (strcmp(value, "lf") == 0) + return GIT_EOL_LF; + + if (strcmp(value, "crlf") == 0) + return GIT_EOL_CRLF; + + return GIT_EOL_UNSET; +} + +static int crlf_input_action(struct crlf_attrs *ca) +{ + if (ca->crlf_action == GIT_CRLF_BINARY) + return GIT_CRLF_BINARY; + + if (ca->eol == GIT_EOL_LF) + return GIT_CRLF_INPUT; + + if (ca->eol == GIT_EOL_CRLF) + return GIT_CRLF_CRLF; + + return ca->crlf_action; +} + +static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path) +{ +#define NUM_CONV_ATTRS 3 + + static const char *attr_names[NUM_CONV_ATTRS] = { + "crlf", "eol", "text", + }; + + const char *attr_vals[NUM_CONV_ATTRS]; + int error; + + error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals); + + if (error == GIT_ENOTFOUND) { + ca->crlf_action = GIT_CRLF_GUESS; + ca->eol = GIT_EOL_UNSET; + return 0; + } + + if (error == GIT_SUCCESS) { + ca->crlf_action = check_crlf(attr_vals[2]); /* text */ + if (ca->crlf_action == GIT_CRLF_GUESS) + ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ + + ca->eol = check_eol(attr_vals[1]); /* eol */ + return 0; + } + + return error; +} + +static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) +{ + size_t i = 0; + struct crlf_filter *filter = (struct crlf_filter *)self; + + assert(self && dest && source); + + if (filter->attrs.crlf_action == GIT_CRLF_AUTO || + filter->attrs.crlf_action == GIT_CRLF_GUESS) { + + git_text_stats stats; + git_text__stat(&stats, source); + + /* + * We're currently not going to even try to convert stuff + * that has bare CR characters. Does anybody do that crazy + * stuff? + */ + if (stats.cr != stats.crlf) + return -1; + + /* + * And add some heuristics for binary vs text, of course... + */ + if (git_text__is_binary(&stats)) + return -1; + +#if 0 + if (crlf_action == CRLF_GUESS) { + /* + * If the file in the index has any CR in it, do not convert. + * This is the new safer autocrlf handling. + */ + if (has_cr_in_index(path)) + return 0; + } +#endif + + if (!stats.cr) + return -1; + } + + /* TODO: do not copy anything if there isn't a single CR */ + while (i < source->size) { + size_t org = i; + + while (i < source->size && source->ptr[i] != '\r') + i++; + + if (i > org) + git_buf_put(dest, source->ptr + org, i - org); + + i++; + + if (i >= source->size || source->ptr[i] != '\n') { + git_buf_putc(dest, '\r'); + } + } + + return 0; +} + +int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path) +{ + struct crlf_filter filter; + int error; + + filter.f.apply = &crlf_apply_to_odb; + filter.f.do_free = NULL; + + if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0) + return error; + + filter.attrs.crlf_action = crlf_input_action(&filter.attrs); + + if (filter.attrs.crlf_action == GIT_CRLF_BINARY) + return 0; + + if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) + return 0; + + *filter_out = git__malloc(sizeof(struct crlf_filter)); + if (*filter_out == NULL) + return GIT_ENOMEM; + + memcpy(*filter_out, &filter, sizeof(struct crlf_attrs)); + return 0; +} + diff --git a/src/filter.c b/src/filter.c index 1775c09c7..ed24ce202 100644 --- a/src/filter.c +++ b/src/filter.c @@ -10,10 +10,8 @@ #include "hash.h" #include "filter.h" -#include "git2/attr.h" - /* Fresh from Core Git. I wonder what we could use this for... */ -void git_text__stat(git_text_stats *stats, git_buf *text) +void git_text__stat(git_text_stats *stats, const git_buf *text) { size_t i; @@ -84,13 +82,45 @@ int git_text__is_binary(git_text_stats *stats) return 0; } -int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode) +int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode) { - /* We don't load any filters yet. HAHA */ + int error; + git_filter *crlf_filter; + + return 0; /* TODO: not quite ready yet */ + + if (mode == GIT_FILTER_TO_ODB) { + error = git_filter__crlf_to_odb(&crlf_filter, repo, path); + if (error < GIT_SUCCESS) + return error; + + if (crlf_filter != NULL) + git_vector_insert(filters, crlf_filter); + + } else { + return git__throw(GIT_ENOTIMPLEMENTED, + "Worktree filters are not implemented yet"); + } + return 0; } -int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename) +void git_filter__free(git_vector *filters) +{ + size_t i; + git_filter *filter; + + git_vector_foreach(filters, i, filter) { + if (filter->do_free != NULL) + filter->do_free(filter); + else + free(filter); + } + + git_vector_free(filters); +} + +int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) { unsigned int src, dst, i; git_buf *dbuffer[2]; @@ -106,7 +136,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const return GIT_ENOMEM; for (i = 0; i < filters->length; ++i) { - git_filter_cb filter = git_vector_get(filters, i); + git_filter *filter = git_vector_get(filters, i); dst = (src + 1) % 2; git_buf_clear(dbuffer[dst]); @@ -117,7 +147,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const * of the double buffering (so that the text goes through * cleanly). */ - if (filter(dbuffer[dst], dbuffer[src], filename) == 0) { + if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) { src = (src + 1) % 2; } @@ -133,88 +163,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const return GIT_SUCCESS; } - -static int check_crlf(const char *value) -{ - if (value == git_attr__true) - return GIT_CRLF_TEXT; - - if (value == git_attr__false) - return GIT_CRLF_BINARY; - - if (value == NULL) - return GIT_CRLF_GUESS; - - if (strcmp(value, "input") == 0) - return GIT_CRLF_INPUT; - - if (strcmp(value, "auto") == 0) - return GIT_CRLF_AUTO; - - return GIT_CRLF_GUESS; -} - -static int check_eol(const char *value) -{ - if (value == NULL) - return GIT_EOL_UNSET; - - if (strcmp(value, "lf") == 0) - return GIT_EOL_LF; - - if (strcmp(value, "crlf") == 0) - return GIT_EOL_CRLF; - - return GIT_EOL_UNSET; -} - -static int check_ident(const char *value) -{ - return (value == git_attr__true); -} - -#if 0 -static int input_crlf_action(enum crlf_action text_attr, enum eol eol_attr) -{ - if (text_attr == CRLF_BINARY) - return CRLF_BINARY; - if (eol_attr == EOL_LF) - return CRLF_INPUT; - if (eol_attr == EOL_CRLF) - return CRLF_CRLF; - return text_attr; -} -#endif - -int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path) -{ -#define NUM_CONV_ATTRS 5 - - static const char *attr_names[NUM_CONV_ATTRS] = { - "crlf", "ident", "filter", "eol", "text", - }; - - const char *attr_vals[NUM_CONV_ATTRS]; - int error; - - error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals); - - if (error == GIT_ENOTFOUND) { - ca->crlf_action = GIT_CRLF_GUESS; - ca->eol_attr = GIT_EOL_UNSET; - ca->ident = 0; - return 0; - } - - if (error == GIT_SUCCESS) { - ca->crlf_action = check_crlf(attr_vals[4]); /* text */ - if (ca->crlf_action == GIT_CRLF_GUESS) - ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ - - ca->ident = check_ident(attr_vals[1]); /* ident */ - ca->eol_attr = check_eol(attr_vals[3]); /* eol */ - return 0; - } - - return error; -} diff --git a/src/filter.h b/src/filter.h index 2ed9da00b..9055fc0dc 100644 --- a/src/filter.h +++ b/src/filter.h @@ -12,7 +12,10 @@ #include "git2/odb.h" #include "git2/repository.h" -typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename); +typedef struct git_filter { + int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source); + void (*do_free)(struct git_filter *self); +} git_filter; typedef enum { GIT_FILTER_TO_WORKTREE, @@ -47,13 +50,6 @@ typedef enum { #endif } git_eol_t; - -typedef struct { - int crlf_action; - int eol_attr; - int ident; -} git_conv_attrs; - typedef struct { /* NUL, CR, LF and CRLF counts */ unsigned int nul, cr, lf, crlf; @@ -63,14 +59,17 @@ typedef struct { } git_text_stats; extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); -extern int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path); -extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename); +extern void git_filter__free(git_vector *filters); +extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters); /* Gather stats for a piece of text */ -extern void git_text__stat(git_text_stats *stats, git_buf *text); +extern void git_text__stat(git_text_stats *stats, const git_buf *text); /* Heuristics on a set of text stats to check whether it's binary * text or not */ extern int git_text__is_binary(git_text_stats *stats); +/* Available filters */ +extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path); + #endif -- cgit v1.2.1 From c5e944820ab50f6106ab4f86f37d087a74acc595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Thu, 1 Mar 2012 00:52:21 +0100 Subject: config: Refactor & add `git_config_get_mapped` Sane API for real-world usage. --- src/config.c | 172 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/config.c b/src/config.c index 4ff1b2e72..912224158 100644 --- a/src/config.c +++ b/src/config.c @@ -209,23 +209,37 @@ int git_config_set_string(git_config *cfg, const char *name, const char *value) return file->set(file, name, value); } -/*********** - * Getters - ***********/ +static int parse_bool(int *out, const char *value) +{ + /* A missing value means true */ + if (value == NULL) { + *out = 1; + return GIT_SUCCESS; + } -int git_config_get_int64(git_config *cfg, const char *name, int64_t *out) + if (!strcasecmp(value, "true") || + !strcasecmp(value, "yes") || + !strcasecmp(value, "on")) { + *out = 1; + return GIT_SUCCESS; + } + if (!strcasecmp(value, "false") || + !strcasecmp(value, "no") || + !strcasecmp(value, "off")) { + *out = 0; + return GIT_SUCCESS; + } + + return GIT_EINVALIDTYPE; +} + +static int parse_int64(int64_t *out, const char *value) { - const char *value, *num_end; - int ret; + const char *num_end; int64_t num; - ret = git_config_get_string(cfg, name, &value); - if (ret < GIT_SUCCESS) - return git__rethrow(ret, "Failed to retrieve value for '%s'", name); - - ret = git__strtol64(&num, value, &num_end, 0); - if (ret < GIT_SUCCESS) - return git__rethrow(ret, "Failed to convert value for '%s'", name); + if (git__strtol64(&num, value, &num_end, 0) < 0) + return GIT_EINVALIDTYPE; switch (*num_end) { case 'g': @@ -245,38 +259,112 @@ int git_config_get_int64(git_config *cfg, const char *name, int64_t *out) /* check that that there are no more characters after the * given modifier suffix */ if (num_end[1] != '\0') - return git__throw(GIT_EINVALIDTYPE, - "Failed to get value for '%s'. Invalid type suffix", name); + return GIT_EINVALIDTYPE; /* fallthrough */ case '\0': *out = num; - return GIT_SUCCESS; + return 0; default: - return git__throw(GIT_EINVALIDTYPE, - "Failed to get value for '%s'. Value is of invalid type", name); + return GIT_EINVALIDTYPE; } } -int git_config_get_int32(git_config *cfg, const char *name, int32_t *out) +static int parse_int32(int32_t *out, const char *value) { - int64_t tmp_long; - int32_t tmp_int; + int64_t tmp; + int32_t truncate; + + if (parse_int64(&tmp, value) < 0) + return GIT_EINVALIDTYPE; + + truncate = tmp & 0xFFFFFFFF; + if (truncate != tmp) + return GIT_EOVERFLOW; + + *out = truncate; + return 0; +} + +/*********** + * Getters + ***********/ +int git_config_get_mapped(git_config *cfg, const char *name, git_cvar_map *maps, size_t map_n, int *out) +{ + size_t i; + const char *value; + int error; + + error = git_config_get_string(cfg, name, &value); + if (error < GIT_SUCCESS) + return error; + + for (i = 0; i < map_n; ++i) { + git_cvar_map *m = maps + i; + + switch (m->cvar_type) { + case GIT_CVAR_FALSE: + case GIT_CVAR_TRUE: { + int bool_val; + + if (parse_bool(&bool_val, value) == 0 && + bool_val == (int)m->cvar_type) { + *out = m->map_value; + return 0; + } + + break; + } + + case GIT_CVAR_INT32: + if (parse_int32(out, value) == 0) + return 0; + + break; + + case GIT_CVAR_STRING: + if (strcasecmp(value, m->str_match) == 0) { + *out = m->map_value; + return 0; + } + } + } + + return git__throw(GIT_ENOTFOUND, + "Failed to map the '%s' config variable with a valid value", name); +} + +int git_config_get_int64(git_config *cfg, const char *name, int64_t *out) +{ + const char *value; int ret; - ret = git_config_get_int64(cfg, name, &tmp_long); + ret = git_config_get_string(cfg, name, &value); if (ret < GIT_SUCCESS) - return git__rethrow(ret, "Failed to convert value for '%s'", name); - - tmp_int = tmp_long & 0xFFFFFFFF; - if (tmp_int != tmp_long) - return git__throw(GIT_EOVERFLOW, "Value for '%s' is too large", name); + return git__rethrow(ret, "Failed to retrieve value for '%s'", name); - *out = tmp_int; + if (parse_int64(out, value) < 0) + return git__throw(GIT_EINVALIDTYPE, "Failed to parse '%s' as an integer", value); - return ret; + return GIT_SUCCESS; +} + +int git_config_get_int32(git_config *cfg, const char *name, int32_t *out) +{ + const char *value; + int error; + + error = git_config_get_string(cfg, name, &value); + if (error < GIT_SUCCESS) + return git__rethrow(error, "Failed to get value for %s", name); + + error = parse_int32(out, value); + if (error < GIT_SUCCESS) + return git__throw(GIT_EINVALIDTYPE, "Failed to parse '%s' as a 32-bit integer", value); + + return GIT_SUCCESS; } int git_config_get_bool(git_config *cfg, const char *name, int *out) @@ -288,33 +376,15 @@ int git_config_get_bool(git_config *cfg, const char *name, int *out) if (error < GIT_SUCCESS) return git__rethrow(error, "Failed to get value for %s", name); - /* A missing value means true */ - if (value == NULL) { - *out = 1; + if (parse_bool(out, value) == 0) return GIT_SUCCESS; - } - if (!strcasecmp(value, "true") || - !strcasecmp(value, "yes") || - !strcasecmp(value, "on")) { - *out = 1; - return GIT_SUCCESS; - } - if (!strcasecmp(value, "false") || - !strcasecmp(value, "no") || - !strcasecmp(value, "off")) { - *out = 0; + if (parse_int32(out, value) == 0) { + *out = !!(*out); return GIT_SUCCESS; } - /* Try to parse it as an integer */ - error = git_config_get_int32(cfg, name, out); - if (error == GIT_SUCCESS) - *out = !!(*out); - - if (error < GIT_SUCCESS) - return git__rethrow(error, "Failed to get value for %s", name); - return error; + return git__throw(GIT_EINVALIDTYPE, "Failed to parse '%s' as a boolean value", value); } int git_config_get_string(git_config *cfg, const char *name, const char **out) -- cgit v1.2.1 From c5266ebac5d9753029f8b10598862cb2b7e13b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Thu, 1 Mar 2012 01:16:25 +0100 Subject: filter: Precache the filter config options on load --- src/filter.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/filter.h | 7 +++++-- src/repository.h | 2 +- 3 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/filter.c b/src/filter.c index ed24ce202..03189eea3 100644 --- a/src/filter.c +++ b/src/filter.c @@ -9,6 +9,8 @@ #include "fileops.h" #include "hash.h" #include "filter.h" +#include "repository.h" +#include "git2/config.h" /* Fresh from Core Git. I wonder what we could use this for... */ void git_text__stat(git_text_stats *stats, const git_buf *text) @@ -163,3 +165,42 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) return GIT_SUCCESS; } +int git_filter__load_settings(git_repository *repo) +{ + static git_cvar_map map_eol[] = { + {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, + {GIT_CVAR_STRING, "lf", GIT_EOL_LF}, + {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF}, + {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE} + }; + + static git_cvar_map map_crlf[] = { + {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE}, + {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE}, + {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT} + }; + + git_config *config; + int error; + + repo->filter_options.eol = GIT_EOL_DEFAULT; + repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT; + + error = git_repository_config__weakptr(&config, repo); + if (error < GIT_SUCCESS) + return error; + + error = git_config_get_mapped( + config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol); + + if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) + return error; + + error = git_config_get_mapped( + config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf); + + if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) + return error; + + return 0; +} diff --git a/src/filter.h b/src/filter.h index 9055fc0dc..0cf92bd1d 100644 --- a/src/filter.h +++ b/src/filter.h @@ -37,6 +37,7 @@ typedef enum { GIT_AUTO_CRLF_FALSE = 0, GIT_AUTO_CRLF_TRUE = 1, GIT_AUTO_CRLF_INPUT = -1, + GIT_AUTO_CRLF_DEFAULT = GIT_AUTO_CRLF_FALSE, } git_crlf_t; typedef enum { @@ -44,10 +45,11 @@ typedef enum { GIT_EOL_CRLF, GIT_EOL_LF, #ifdef GIT_WIN32 - GIT_EOL_NATIVE = GIT_EOL_CRLF + GIT_EOL_NATIVE = GIT_EOL_CRLF, #else - GIT_EOL_NATIVE = GIT_EOL_LF + GIT_EOL_NATIVE = GIT_EOL_LF, #endif + GIT_EOL_DEFAULT = GIT_EOL_NATIVE } git_eol_t; typedef struct { @@ -58,6 +60,7 @@ typedef struct { unsigned int printable, nonprintable; } git_text_stats; +extern int git_filter__load_settings(git_repository *repo); extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); extern void git_filter__free(git_vector *filters); extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters); diff --git a/src/repository.h b/src/repository.h index fa19d2e38..48505028c 100644 --- a/src/repository.h +++ b/src/repository.h @@ -48,7 +48,7 @@ struct git_repository { unsigned int lru_counter; struct { - int core_eol; + int eol; int auto_crlf; } filter_options; }; -- cgit v1.2.1 From 788430c8e3fa90dd965b44fb31ba8b2eece2ca37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Thu, 1 Mar 2012 05:06:47 +0100 Subject: filter: Properly cache filter settings --- src/filter.c | 10 ++++++++-- src/repository.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/filter.c b/src/filter.c index 03189eea3..f517512dd 100644 --- a/src/filter.c +++ b/src/filter.c @@ -87,9 +87,11 @@ int git_text__is_binary(git_text_stats *stats) int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode) { int error; - git_filter *crlf_filter; + git_filter *crlf_filter = NULL; - return 0; /* TODO: not quite ready yet */ + error = git_filter__load_settings(repo); + if (error < GIT_SUCCESS) + return error; if (mode == GIT_FILTER_TO_ODB) { error = git_filter__crlf_to_odb(&crlf_filter, repo, path); @@ -183,6 +185,9 @@ int git_filter__load_settings(git_repository *repo) git_config *config; int error; + if (repo->filter_options.loaded) + return GIT_SUCCESS; + repo->filter_options.eol = GIT_EOL_DEFAULT; repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT; @@ -202,5 +207,6 @@ int git_filter__load_settings(git_repository *repo) if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) return error; + repo->filter_options.loaded = 1; return 0; } diff --git a/src/repository.h b/src/repository.h index 48505028c..83f088821 100644 --- a/src/repository.h +++ b/src/repository.h @@ -48,6 +48,7 @@ struct git_repository { unsigned int lru_counter; struct { + int loaded; int eol; int auto_crlf; } filter_options; -- cgit v1.2.1 From 47a899ffed3c71080e10e73eda092a716f1be168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Thu, 1 Mar 2012 21:19:51 +0100 Subject: filter: Beautiful refactoring Comments soothe my soul. --- src/blob.c | 34 ++++++++++-------- src/crlf.c | 116 +++++++++++++++++++++++++++++++++++++++++------------------ src/filter.c | 116 ++++++++++++++++++++++++++++++----------------------------- src/filter.h | 84 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 235 insertions(+), 115 deletions(-) (limited to 'src') diff --git a/src/blob.c b/src/blob.c index 245326157..e1f4a7f6a 100644 --- a/src/blob.c +++ b/src/blob.c @@ -115,19 +115,18 @@ static int write_file_filtered( if (error < GIT_SUCCESS) return error; - error = git_filter__apply(&dest, &source, filters); + error = git_filters_apply(&dest, &source, filters); - if (error < GIT_SUCCESS) { - git_buf_free(&source); - git_buf_free(&dest); - return error; - } + /* Free the source as soon as possible. This can be big in memory, + * and we don't want to ODB write to choke */ + git_buf_free(&source); - error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); + if (error == GIT_SUCCESS) { + /* Write the file to disk if it was properly filtered */ + error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); + } - git_buf_free(&source); git_buf_free(&dest); - return GIT_SUCCESS; } @@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat error = write_symlink(oid, odb, full_path.ptr, (size_t)size); } else { git_vector write_filters = GIT_VECTOR_INIT; + int filter_count; - if ((error = git_filter__load_for_file( - &write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) - goto cleanup; + /* Load the filters for writing this file to the ODB */ + filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB); - if (write_filters.length == 0) { + if (filter_count < 0) { + /* Negative value means there was a critical error */ + error = filter_count; + goto cleanup; + } else if (filter_count == 0) { + /* No filters need to be applied to the document: we can stream + * directly from disk */ error = write_file_stream(oid, odb, full_path.ptr, size); } else { + /* We need to apply one or more filters */ error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } - git_filter__free(&write_filters); + git_filters_free(&write_filters); /* * TODO: eventually support streaming filtered files, for files which are bigger diff --git a/src/crlf.c b/src/crlf.c index d8dd1c382..feaa687ee 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con return error; } -static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) +static int drop_crlf(git_buf *dest, const git_buf *source) { + size_t psize = source->size - 1; size_t i = 0; + + /* Initial scan: see if we can reach the end of the document + * without finding a single carriage return */ + while (i < psize && source->ptr[i] != '\r') + i++; + + /* Clean file? Tell the library to skip this filter */ + if (i == psize) + return -1; + + /* Main scan loop. Keep moving forward until we find a carriage + * return, and then copy the whole chunk to the destination + * buffer. + * + * Note that we only scan until `size - 1`, because we cannot drop a + * carriage return if it's the last character in the file (what a weird + * file, anyway) + */ + while (i < psize) { + size_t org = i; + + while (i < psize && source->ptr[i] != '\r') + i++; + + if (i > org) + git_buf_put(dest, source->ptr + org, i - org); + + /* We found a carriage return. Is the next character a newline? + * If it is, we just keep moving. The newline will be copied + * to the dest in the next chunk. + * + * If it's not a newline, we need to insert the carriage return + * into the dest buffer, because we don't drop lone CRs. + */ + if (source->ptr[i + 1] != '\n') { + git_buf_putc(dest, '\r'); + } + + i++; + } + + /* Copy the last character in the file */ + git_buf_putc(dest, source->ptr[psize]); + return 0; +} + +static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) +{ struct crlf_filter *filter = (struct crlf_filter *)self; assert(self && dest && source); + /* Empty file? Nothing to do */ + if (source->size == 0) + return 0; + + /* Heuristics to see if we can skip the conversion. + * Straight from Core Git. + */ if (filter->attrs.crlf_action == GIT_CRLF_AUTO || filter->attrs.crlf_action == GIT_CRLF_GUESS) { git_text_stats stats; - git_text__stat(&stats, source); + git_text_gather_stats(&stats, source); /* * We're currently not going to even try to convert stuff @@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou /* * And add some heuristics for binary vs text, of course... */ - if (git_text__is_binary(&stats)) + if (git_text_is_binary(&stats)) return -1; #if 0 @@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou return -1; } - /* TODO: do not copy anything if there isn't a single CR */ - while (i < source->size) { - size_t org = i; - - while (i < source->size && source->ptr[i] != '\r') - i++; - - if (i > org) - git_buf_put(dest, source->ptr + org, i - org); - - i++; - - if (i >= source->size || source->ptr[i] != '\n') { - git_buf_putc(dest, '\r'); - } - } - - return 0; + /* Actually drop the carriage returns */ + return drop_crlf(dest, source); } -int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path) +int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path) { - struct crlf_filter filter; + struct crlf_attrs ca; + struct crlf_filter *filter; int error; - filter.f.apply = &crlf_apply_to_odb; - filter.f.do_free = NULL; - - if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0) + /* Load gitattributes for the path */ + if ((error = crlf_load_attributes(&ca, repo, path)) < 0) return error; - filter.attrs.crlf_action = crlf_input_action(&filter.attrs); + /* + * Use the core Git logic to see if we should perform CRLF for this file + * based on its attributes & the value of `core.auto_crlf` + */ + ca.crlf_action = crlf_input_action(&ca); - if (filter.attrs.crlf_action == GIT_CRLF_BINARY) + if (ca.crlf_action == GIT_CRLF_BINARY) return 0; - if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) + if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) return 0; - *filter_out = git__malloc(sizeof(struct crlf_filter)); - if (*filter_out == NULL) + /* If we're good, we create a new filter object and push it + * into the filters array */ + filter = git__malloc(sizeof(struct crlf_filter)); + if (filter == NULL) return GIT_ENOMEM; - memcpy(*filter_out, &filter, sizeof(struct crlf_attrs)); - return 0; + filter->f.apply = &crlf_apply_to_odb; + filter->f.do_free = NULL; + memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs)); + + return git_vector_insert(filters, filter); } diff --git a/src/filter.c b/src/filter.c index f517512dd..92b3566af 100644 --- a/src/filter.c +++ b/src/filter.c @@ -13,7 +13,7 @@ #include "git2/config.h" /* Fresh from Core Git. I wonder what we could use this for... */ -void git_text__stat(git_text_stats *stats, const git_buf *text) +void git_text_gather_stats(git_text_stats *stats, const git_buf *text) { size_t i; @@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text) /* * Fresh from Core Git */ -int git_text__is_binary(git_text_stats *stats) +int git_text_is_binary(git_text_stats *stats) { if (stats->nul) return 1; @@ -84,32 +84,74 @@ int git_text__is_binary(git_text_stats *stats) return 0; } -int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode) +static int load_repository_settings(git_repository *repo) +{ + static git_cvar_map map_eol[] = { + {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, + {GIT_CVAR_STRING, "lf", GIT_EOL_LF}, + {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF}, + {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE} + }; + + static git_cvar_map map_crlf[] = { + {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE}, + {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE}, + {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT} + }; + + git_config *config; + int error; + + if (repo->filter_options.loaded) + return GIT_SUCCESS; + + repo->filter_options.eol = GIT_EOL_DEFAULT; + repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT; + + error = git_repository_config__weakptr(&config, repo); + if (error < GIT_SUCCESS) + return error; + + error = git_config_get_mapped( + config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol); + + if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) + return error; + + error = git_config_get_mapped( + config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf); + + if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) + return error; + + repo->filter_options.loaded = 1; + return 0; +} + +int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) { int error; - git_filter *crlf_filter = NULL; - error = git_filter__load_settings(repo); + /* Make sure that the relevant settings from `gitconfig` have been + * cached on the repository struct to speed things up */ + error = load_repository_settings(repo); if (error < GIT_SUCCESS) return error; if (mode == GIT_FILTER_TO_ODB) { - error = git_filter__crlf_to_odb(&crlf_filter, repo, path); + /* Load the CRLF cleanup filter when writing to the ODB */ + error = git_filter_add__crlf_to_odb(filters, repo, path); if (error < GIT_SUCCESS) return error; - - if (crlf_filter != NULL) - git_vector_insert(filters, crlf_filter); - } else { return git__throw(GIT_ENOTIMPLEMENTED, "Worktree filters are not implemented yet"); } - return 0; + return (int)filters->length; } -void git_filter__free(git_vector *filters) +void git_filters_free(git_vector *filters) { size_t i; git_filter *filter; @@ -124,7 +166,7 @@ void git_filter__free(git_vector *filters) git_vector_free(filters); } -int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) +int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) { unsigned int src, dst, i; git_buf *dbuffer[2]; @@ -134,6 +176,11 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) src = 0; + if (source->size == 0) { + git_buf_clear(dest); + return GIT_SUCCESS; + } + /* Pre-grow the destination buffer to more or less the size * we expect it to have */ if (git_buf_grow(dest, source->size) < 0) @@ -167,46 +214,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) return GIT_SUCCESS; } -int git_filter__load_settings(git_repository *repo) -{ - static git_cvar_map map_eol[] = { - {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, - {GIT_CVAR_STRING, "lf", GIT_EOL_LF}, - {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF}, - {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE} - }; - - static git_cvar_map map_crlf[] = { - {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE}, - {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE}, - {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT} - }; - - git_config *config; - int error; - - if (repo->filter_options.loaded) - return GIT_SUCCESS; - - repo->filter_options.eol = GIT_EOL_DEFAULT; - repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT; - - error = git_repository_config__weakptr(&config, repo); - if (error < GIT_SUCCESS) - return error; - - error = git_config_get_mapped( - config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol); - - if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) - return error; - - error = git_config_get_mapped( - config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf); - - if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) - return error; - - repo->filter_options.loaded = 1; - return 0; -} diff --git a/src/filter.h b/src/filter.h index 0cf92bd1d..601be1836 100644 --- a/src/filter.h +++ b/src/filter.h @@ -60,19 +60,81 @@ typedef struct { unsigned int printable, nonprintable; } git_text_stats; -extern int git_filter__load_settings(git_repository *repo); -extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); -extern void git_filter__free(git_vector *filters); -extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters); +/* + * FILTER API + */ + +/* + * For any given path in the working directory, fill the `filters` + * array with the relevant filters that need to be applied. + * + * Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the + * filters that will be used when checking out a file to the working + * directory, or `GIT_FILTER_TO_ODB` for the filters used when writing + * a file to the ODB. + * + * @param filters Vector where to store all the loaded filters + * @param repo Repository object that contains `path` + * @param path Relative path of the file to be filtered + * @param mode Filtering direction (WT->ODB or ODB->WT) + * @return the number of filters loaded for the file (0 if the file + * doesn't need filtering), or a negative error code + */ +extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode); + +/* + * Apply one or more filters to a file. + * + * The file must have been loaded as a `git_buf` object. Both the `source` + * and `dest` buffers are owned by the caller and must be freed once + * they are no longer needed. + * + * NOTE: Because of the double-buffering schema, the `source` buffer that contains + * the original file may be tampered once the filtering is complete. Regardless, + * the `dest` buffer will always contain the final result of the filtering + * + * @param dest Buffer to store the result of the filtering + * @param source Buffer containing the document to filter + * @param filters A non-empty vector of filters as supplied by `git_filters_load` + * @return GIT_SUCCESS on success, an error code otherwise + */ +extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); + +/* + * Free the `filters` array generated by `git_filters_load`. + * + * Note that this frees both the array and its contents. The array will + * be clean/reusable after this call. + * + * @param filters A filters array as supplied by `git_filters_load` + */ +extern void git_filters_free(git_vector *filters); + +/* + * Available filters + */ -/* Gather stats for a piece of text */ -extern void git_text__stat(git_text_stats *stats, const git_buf *text); +/* Strip CRLF, from Worktree to ODB */ +extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path); -/* Heuristics on a set of text stats to check whether it's binary - * text or not */ -extern int git_text__is_binary(git_text_stats *stats); -/* Available filters */ -extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path); +/* + * PLAINTEXT API + */ + +/* + * Gather stats for a piece of text + * + * Fill the `stats` structure with information on the number of + * unreadable characters, carriage returns, etc, so it can be + * used in heuristics. + */ +extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text); + +/* + * Process `git_text_stats` data generated by `git_text_stat` to see + * if it qualifies as a binary file + */ +extern int git_text_is_binary(git_text_stats *stats); #endif -- cgit v1.2.1 From c63793ee81ee6961b2430e88379d491fa8e91bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Fri, 2 Mar 2012 03:51:45 +0100 Subject: attr: Change the attribute check macros The point of having `GIT_ATTR_TRUE` and `GIT_ATTR_FALSE` macros is to be able to change the way that true and false values are stored inside of the returned gitattributes value pointer. However, if these macros are implemented as a simple rename for the `git_attr__true` pointer, they will always be used with the `==` operator, and hence we cannot really change the implementation to any other way that doesn't imply using special pointer values and comparing them! We need to do the same thing that core Git does, which is using a function macro. With `GIT_ATTR_TRUE(attr)`, we can change internally the way that these values are stored to anything we want. This commit does that, and rewrites a large chunk of the attributes test suite to remove duplicated code for expected attributes, and to properly test the function macro behavior instead of comparing pointers. --- src/attr_file.c | 6 +++--- src/crlf.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/attr_file.c b/src/attr_file.c index a1b69a5bb..3783b5ef3 100644 --- a/src/attr_file.c +++ b/src/attr_file.c @@ -458,12 +458,12 @@ int git_attr_assignment__parse( } assign->name_hash = 5381; - assign->value = GIT_ATTR_TRUE; + assign->value = git_attr__true; assign->is_allocated = 0; /* look for magic name prefixes */ if (*scan == '-') { - assign->value = GIT_ATTR_FALSE; + assign->value = git_attr__false; scan++; } else if (*scan == '!') { assign->value = NULL; /* explicit unspecified state */ @@ -510,7 +510,7 @@ int git_attr_assignment__parse( } /* expand macros (if given a repo with a macro cache) */ - if (repo != NULL && assign->value == GIT_ATTR_TRUE) { + if (repo != NULL && assign->value == git_attr__true) { git_attr_rule *macro = git_hashtable_lookup(repo->attrcache.macros, assign->name); diff --git a/src/crlf.c b/src/crlf.c index feaa687ee..e74f8e89b 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -25,13 +25,13 @@ struct crlf_filter { static int check_crlf(const char *value) { - if (value == git_attr__true) + if (GIT_ATTR_TRUE(value)) return GIT_CRLF_TEXT; - if (value == git_attr__false) + if (GIT_ATTR_FALSE(value)) return GIT_CRLF_BINARY; - if (value == NULL) + if (GIT_ATTR_UNSPECIFIED(value)) return GIT_CRLF_GUESS; if (strcmp(value, "input") == 0) @@ -45,7 +45,7 @@ static int check_crlf(const char *value) static int check_eol(const char *value) { - if (value == NULL) + if (GIT_ATTR_UNSPECIFIED(value)) return GIT_EOL_UNSET; if (strcmp(value, "lf") == 0) -- cgit v1.2.1 From f2c25d1893cfa897b0d36005604c134a731e402d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Fri, 2 Mar 2012 20:08:00 +0100 Subject: config: Implement a proper cvar cache --- src/crlf.c | 12 +++++++++-- src/filter.c | 50 ------------------------------------------- src/filter.h | 21 ------------------ src/repository.c | 5 +++++ src/repository.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 75 insertions(+), 78 deletions(-) (limited to 'src') diff --git a/src/crlf.c b/src/crlf.c index e74f8e89b..404156d6a 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -223,8 +223,16 @@ int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const if (ca.crlf_action == GIT_CRLF_BINARY) return 0; - if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) - return 0; + if (ca.crlf_action == GIT_CRLF_GUESS) { + int auto_crlf; + + if ((error = git_repository__cvar( + &auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < GIT_SUCCESS) + return error; + + if (auto_crlf == GIT_AUTO_CRLF_FALSE) + return 0; + } /* If we're good, we create a new filter object and push it * into the filters array */ diff --git a/src/filter.c b/src/filter.c index 92b3566af..f93730acb 100644 --- a/src/filter.c +++ b/src/filter.c @@ -84,60 +84,10 @@ int git_text_is_binary(git_text_stats *stats) return 0; } -static int load_repository_settings(git_repository *repo) -{ - static git_cvar_map map_eol[] = { - {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, - {GIT_CVAR_STRING, "lf", GIT_EOL_LF}, - {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF}, - {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE} - }; - - static git_cvar_map map_crlf[] = { - {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE}, - {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE}, - {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT} - }; - - git_config *config; - int error; - - if (repo->filter_options.loaded) - return GIT_SUCCESS; - - repo->filter_options.eol = GIT_EOL_DEFAULT; - repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT; - - error = git_repository_config__weakptr(&config, repo); - if (error < GIT_SUCCESS) - return error; - - error = git_config_get_mapped( - config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol); - - if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) - return error; - - error = git_config_get_mapped( - config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf); - - if (error < GIT_SUCCESS && error != GIT_ENOTFOUND) - return error; - - repo->filter_options.loaded = 1; - return 0; -} - int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) { int error; - /* Make sure that the relevant settings from `gitconfig` have been - * cached on the repository struct to speed things up */ - error = load_repository_settings(repo); - if (error < GIT_SUCCESS) - return error; - if (mode == GIT_FILTER_TO_ODB) { /* Load the CRLF cleanup filter when writing to the ODB */ error = git_filter_add__crlf_to_odb(filters, repo, path); diff --git a/src/filter.h b/src/filter.h index 601be1836..5a77f25c6 100644 --- a/src/filter.h +++ b/src/filter.h @@ -29,29 +29,8 @@ typedef enum { GIT_CRLF_INPUT, GIT_CRLF_CRLF, GIT_CRLF_AUTO, - - GIT_SAFE_CRLF_FALSE = 0, - GIT_SAFE_CRLF_FAIL = 1, - GIT_SAFE_CRLF_WARN = 2, - - GIT_AUTO_CRLF_FALSE = 0, - GIT_AUTO_CRLF_TRUE = 1, - GIT_AUTO_CRLF_INPUT = -1, - GIT_AUTO_CRLF_DEFAULT = GIT_AUTO_CRLF_FALSE, } git_crlf_t; -typedef enum { - GIT_EOL_UNSET, - GIT_EOL_CRLF, - GIT_EOL_LF, -#ifdef GIT_WIN32 - GIT_EOL_NATIVE = GIT_EOL_CRLF, -#else - GIT_EOL_NATIVE = GIT_EOL_LF, -#endif - GIT_EOL_DEFAULT = GIT_EOL_NATIVE -} git_eol_t; - typedef struct { /* NUL, CR, LF and CRLF counts */ unsigned int nul, cr, lf, crlf; diff --git a/src/repository.c b/src/repository.c index c46dd9df9..1f8306991 100644 --- a/src/repository.c +++ b/src/repository.c @@ -43,6 +43,8 @@ static void drop_config(git_repository *repo) git_config_free(repo->_config); repo->_config = NULL; } + + git_repository__cvar_cache_clear(repo); } static void drop_index(git_repository *repo) @@ -111,6 +113,9 @@ static git_repository *repository_alloc(void) return NULL; } + /* set all the entries in the cvar cache to `unset` */ + git_repository__cvar_cache_clear(repo); + return repo; } diff --git a/src/repository.h b/src/repository.h index 83f088821..b5dcc1340 100644 --- a/src/repository.h +++ b/src/repository.h @@ -26,6 +26,49 @@ #define GIT_DIR_MODE 0755 #define GIT_BARE_DIR_MODE 0777 +/** Cvar cache identifiers */ +typedef enum { + GIT_CVAR_AUTO_CRLF = 0, /* core.autocrlf */ + GIT_CVAR_EOL, /* core.eol */ + GIT_CVAR_CACHE_MAX +} git_cvar_cached; + +/** + * CVAR value enumerations + * + * These are the values that are actually stored in the cvar cache, instead + * of their string equivalents. These values are internal and symbolic; + * make sure that none of them is set to `-1`, since that is the unique + * identifier for "not cached" + */ +typedef enum { + /* The value hasn't been loaded from the cache yet */ + GIT_CVAR_NOT_CACHED = -1, + + /* core.safecrlf: false, 'fail', 'warn' */ + GIT_SAFE_CRLF_FALSE = 0, + GIT_SAFE_CRLF_FAIL = 1, + GIT_SAFE_CRLF_WARN = 2, + + /* core.autocrlf: false, true, 'input; */ + GIT_AUTO_CRLF_FALSE = 0, + GIT_AUTO_CRLF_TRUE = 1, + GIT_AUTO_CRLF_INPUT = 2, + GIT_AUTO_CRLF_DEFAULT = GIT_AUTO_CRLF_FALSE, + + /* core.eol: unset, 'crlf', 'lf', 'native' */ + GIT_EOL_UNSET = 0, + GIT_EOL_CRLF = 1, + GIT_EOL_LF = 2, +#ifdef GIT_WIN32 + GIT_EOL_NATIVE = GIT_EOL_CRLF, +#else + GIT_EOL_NATIVE = GIT_EOL_LF, +#endif + GIT_EOL_DEFAULT = GIT_EOL_NATIVE +} git_cvar_value; + +/** Base git object for inheritance */ struct git_object { git_cached_obj cached; git_repository *repo; @@ -47,11 +90,7 @@ struct git_repository { unsigned is_bare:1; unsigned int lru_counter; - struct { - int loaded; - int eol; - int auto_crlf; - } filter_options; + git_cvar_value cvar_cache[GIT_CVAR_CACHE_MAX]; }; /* fully free the object; internal method, do not @@ -61,8 +100,24 @@ void git_object__free(void *object); int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid); +/* + * Weak pointers to repository internals. + * + * The returned pointers do not need to be freed. Do not keep + * permanent references to these (i.e. between API calls), since they may + * become invalidated if the user replaces a repository internal. + */ int git_repository_config__weakptr(git_config **out, git_repository *repo); int git_repository_odb__weakptr(git_odb **out, git_repository *repo); int git_repository_index__weakptr(git_index **out, git_repository *repo); +/* + * CVAR cache + * + * Efficient access to the most used config variables of a repository. + * The cache is cleared everytime the config backend is replaced. + */ +int git_repository__cvar(int *out, git_repository *repo, git_cvar_cached cvar); +void git_repository__cvar_cache_clear(git_repository *repo); + #endif -- cgit v1.2.1 From 97da3eaec806c542467ca2c3ec9011475c87b8d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= Date: Fri, 2 Mar 2012 21:12:00 +0100 Subject: config: Add missing file --- src/config_cache.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/config_cache.c (limited to 'src') diff --git a/src/config_cache.c b/src/config_cache.c new file mode 100644 index 000000000..5e20847f5 --- /dev/null +++ b/src/config_cache.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "fileops.h" +#include "hashtable.h" +#include "config.h" +#include "git2/config.h" +#include "vector.h" +#include "filter.h" +#include "repository.h" + +struct map_data { + const char *cvar_name; + git_cvar_map *maps; + size_t map_count; + int default_value; +}; + +/* + * core.eol + * Sets the line ending type to use in the working directory for + * files that have the text property set. Alternatives are lf, crlf + * and native, which uses the platform’s native line ending. The default + * value is native. See gitattributes(5) for more information on + * end-of-line conversion. + */ +static git_cvar_map _cvar_map_eol[] = { + {GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET}, + {GIT_CVAR_STRING, "lf", GIT_EOL_LF}, + {GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF}, + {GIT_CVAR_STRING, "native", GIT_EOL_NATIVE} +}; + +/* + * core.autocrlf + * Setting this variable to "true" is almost the same as setting + * the text attribute to "auto" on all files except that text files are + * not guaranteed to be normalized: files that contain CRLF in the + * repository will not be touched. Use this setting if you want to have + * CRLF line endings in your working directory even though the repository + * does not have normalized line endings. This variable can be set to input, + * in which case no output conversion is performed. + */ +static git_cvar_map _cvar_map_autocrlf[] = { + {GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE}, + {GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE}, + {GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT} +}; + +static struct map_data _cvar_maps[] = { + {"core.autocrlf", _cvar_map_autocrlf, ARRAY_SIZE(_cvar_map_autocrlf), GIT_AUTO_CRLF_DEFAULT}, + {"core.eol", _cvar_map_eol, ARRAY_SIZE(_cvar_map_eol), GIT_EOL_DEFAULT} +}; + +int git_repository__cvar(int *out, git_repository *repo, git_cvar_cached cvar) +{ + *out = repo->cvar_cache[(int)cvar]; + + if (*out == GIT_CVAR_NOT_CACHED) { + struct map_data *data = &_cvar_maps[(int)cvar]; + git_config *config; + int error; + + error = git_repository_config__weakptr(&config, repo); + if (error < GIT_SUCCESS) + return error; + + error = git_config_get_mapped( + config, data->cvar_name, data->maps, data->map_count, out); + + if (error == GIT_ENOTFOUND) + *out = data->default_value; + + else if (error < GIT_SUCCESS) + return error; + + repo->cvar_cache[(int)cvar] = *out; + } + + return GIT_SUCCESS; +} + +void git_repository__cvar_cache_clear(git_repository *repo) +{ + int i; + + for (i = 0; i < GIT_CVAR_CACHE_MAX; ++i) + repo->cvar_cache[i] = GIT_CVAR_NOT_CACHED; +} + -- cgit v1.2.1 From ce49c7a8a902bd3a74a59a356dd11886e83d2e92 Mon Sep 17 00:00:00 2001 From: Russell Belfer Date: Fri, 2 Mar 2012 15:09:40 -0800 Subject: Add filter tests and fix some bugs This adds some initial unit tests for file filtering and fixes some simple bugs in filter application. --- src/blob.c | 6 ++++++ src/blob.h | 1 + src/buffer.c | 7 +++++++ src/buffer.h | 2 ++ src/crlf.c | 60 ++++++++++++++++++++---------------------------------------- src/filter.c | 31 +++++++++++++++---------------- 6 files changed, 51 insertions(+), 56 deletions(-) (limited to 'src') diff --git a/src/blob.c b/src/blob.c index e1f4a7f6a..b67f8afa5 100644 --- a/src/blob.c +++ b/src/blob.c @@ -25,6 +25,12 @@ size_t git_blob_rawsize(git_blob *blob) return blob->odb_object->raw.len; } +int git_blob__getbuf(git_buf *buffer, git_blob *blob) +{ + return git_buf_set( + buffer, blob->odb_object->raw.data, blob->odb_object->raw.len); +} + void git_blob__free(git_blob *blob) { git_odb_object_free(blob->odb_object); diff --git a/src/blob.h b/src/blob.h index f810b506b..0305e9473 100644 --- a/src/blob.h +++ b/src/blob.h @@ -19,5 +19,6 @@ struct git_blob { void git_blob__free(git_blob *blob); int git_blob__parse(git_blob *blob, git_odb_object *obj); +int git_blob__getbuf(git_buf *buffer, git_blob *blob); #endif diff --git a/src/buffer.c b/src/buffer.c index 68cc39388..3098f6d68 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -389,3 +389,10 @@ void git_buf_rtrim(git_buf *buf) buf->ptr[buf->size] = '\0'; } + +int git_buf_cmp(const git_buf *a, const git_buf *b) +{ + int result = memcmp(a->ptr, b->ptr, min(a->size, b->size)); + return (result != 0) ? result : + (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; +} diff --git a/src/buffer.h b/src/buffer.h index 3e9cb1713..3cdd794af 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -118,4 +118,6 @@ GIT_INLINE(int) git_buf_rfind_next(git_buf *buf, char ch) /* Remove whitespace from the end of the buffer */ void git_buf_rtrim(git_buf *buf); +int git_buf_cmp(const git_buf *a, const git_buf *b); + #endif diff --git a/src/crlf.c b/src/crlf.c index 404156d6a..f0ec7b736 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -104,52 +104,32 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con static int drop_crlf(git_buf *dest, const git_buf *source) { - size_t psize = source->size - 1; - size_t i = 0; + const char *scan = source->ptr, *next; + const char *scan_end = source->ptr + source->size; - /* Initial scan: see if we can reach the end of the document - * without finding a single carriage return */ - while (i < psize && source->ptr[i] != '\r') - i++; - - /* Clean file? Tell the library to skip this filter */ - if (i == psize) - return -1; - - /* Main scan loop. Keep moving forward until we find a carriage - * return, and then copy the whole chunk to the destination - * buffer. - * - * Note that we only scan until `size - 1`, because we cannot drop a - * carriage return if it's the last character in the file (what a weird - * file, anyway) + /* Main scan loop. Find the next carriage return and copy the + * whole chunk up to that point to the destination buffer. */ - while (i < psize) { - size_t org = i; + while ((next = memchr(scan, '\r', scan_end - scan)) != NULL) { + /* copy input up to \r */ + if (next > scan) + git_buf_put(dest, scan, next - scan); - while (i < psize && source->ptr[i] != '\r') - i++; - - if (i > org) - git_buf_put(dest, source->ptr + org, i - org); - - /* We found a carriage return. Is the next character a newline? - * If it is, we just keep moving. The newline will be copied - * to the dest in the next chunk. - * - * If it's not a newline, we need to insert the carriage return - * into the dest buffer, because we don't drop lone CRs. - */ - if (source->ptr[i + 1] != '\n') { + /* Do not drop \r unless it is followed by \n */ + if (*(next + 1) != '\n') git_buf_putc(dest, '\r'); - } - - i++; + + scan = next + 1; } - /* Copy the last character in the file */ - git_buf_putc(dest, source->ptr[psize]); - return 0; + /* If there was no \r, then tell the library to skip this filter */ + if (scan == source->ptr) + return -1; + + /* Copy remaining input into dest */ + git_buf_put(dest, scan, scan_end - scan); + + return git_buf_lasterror(dest); } static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) diff --git a/src/filter.c b/src/filter.c index f93730acb..f0ee1ad39 100644 --- a/src/filter.c +++ b/src/filter.c @@ -12,7 +12,7 @@ #include "repository.h" #include "git2/config.h" -/* Fresh from Core Git. I wonder what we could use this for... */ +/* Tweaked from Core Git. I wonder what we could use this for... */ void git_text_gather_stats(git_text_stats *stats, const git_buf *text) { size_t i; @@ -27,20 +27,20 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text) if (i + 1 < text->size && text->ptr[i + 1] == '\n') stats->crlf++; - - continue; } - if (c == '\n') { + else if (c == '\n') stats->lf++; - continue; - } - if (c == 127) + else if (c == 0x85) + /* Unicode CR+LF */ + stats->crlf++; + + else if (c == 127) /* DEL */ stats->nonprintable++; - else if (c < 32) { + else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) { switch (c) { /* BS, HT, ESC and FF */ case '\b': case '\t': case '\033': case '\014': @@ -53,6 +53,7 @@ void git_text_gather_stats(git_text_stats *stats, const git_buf *text) stats->nonprintable++; } } + else stats->printable++; } @@ -118,7 +119,7 @@ void git_filters_free(git_vector *filters) int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) { - unsigned int src, dst, i; + unsigned int i, src; git_buf *dbuffer[2]; dbuffer[0] = source; @@ -138,28 +139,26 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) for (i = 0; i < filters->length; ++i) { git_filter *filter = git_vector_get(filters, i); - dst = (src + 1) % 2; + unsigned int dst = 1 - src; git_buf_clear(dbuffer[dst]); - /* Apply the filter, from dbuffer[src] to dbuffer[dst]; + /* Apply the filter from dbuffer[src] to the other buffer; * if the filtering is canceled by the user mid-filter, * we skip to the next filter without changing the source * of the double buffering (so that the text goes through * cleanly). */ - if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) { - src = (src + 1) % 2; - } + if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) + src = dst; if (git_buf_oom(dbuffer[dst])) return GIT_ENOMEM; } /* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ - if (dst != 1) { + if (src != 1) git_buf_swap(dest, source); - } return GIT_SUCCESS; } -- cgit v1.2.1