diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2018-12-03 13:35:30 -0800 |
---|---|---|
committer | Edward Thomson <ethomson@edwardthomson.com> | 2018-12-03 17:02:10 -0800 |
commit | ef8f8ec604035142916271178b009f57faf1877a (patch) | |
tree | b5836ab7f84946055777ca5c9c1aa0c207215410 | |
parent | 59b054cb7efbfa97153f3d4dc0ab22c9ded31a22 (diff) | |
download | libgit2-ef8f8ec604035142916271178b009f57faf1877a.tar.gz |
crlf: update to match git's logic
Examine the recent CRLF changes to git by Torsten Bögershausen and
include similar changes to update our CRLF logic to match.
Note: Torsten Bögershausen has previously agreed to allow his changes to
be included in libgit2.
-rw-r--r-- | src/buf_text.c | 3 | ||||
-rw-r--r-- | src/crlf.c | 403 | ||||
-rw-r--r-- | src/filter.h | 10 |
3 files changed, 215 insertions, 201 deletions
diff --git a/src/buf_text.c b/src/buf_text.c index 306980b5c..fa7f16b81 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -310,6 +310,7 @@ bool git_buf_text_gather_stats( } } - return (stats->nul > 0 || + /* Treat files with a bare CR as binary */ + return (stats->cr != stats->crlf || stats->nul > 0 || ((stats->printable >> 7) < stats->nonprintable)); } diff --git a/src/crlf.c b/src/crlf.c index 96cd2d807..ab2419c4c 100644 --- a/src/crlf.c +++ b/src/crlf.c @@ -18,68 +18,58 @@ #include "buf_text.h" #include "repository.h" +typedef enum { + GIT_CRLF_UNDEFINED, + GIT_CRLF_BINARY, + GIT_CRLF_TEXT, + GIT_CRLF_TEXT_INPUT, + GIT_CRLF_TEXT_CRLF, + GIT_CRLF_AUTO, + GIT_CRLF_AUTO_INPUT, + GIT_CRLF_AUTO_CRLF, +} git_crlf_t; + struct crlf_attrs { - int crlf_action; - int eol; + int attr_action; /* the .gitattributes setting */ + int crlf_action; /* the core.autocrlf setting */ + int auto_crlf; int safe_crlf; + git_cvar_value core_eol; }; struct crlf_filter { git_filter f; }; -static int check_crlf(const char *value) +static git_crlf_t check_crlf(const char *value) { if (GIT_ATTR_TRUE(value)) return GIT_CRLF_TEXT; - - if (GIT_ATTR_FALSE(value)) + else if (GIT_ATTR_FALSE(value)) return GIT_CRLF_BINARY; - - if (GIT_ATTR_UNSPECIFIED(value)) - return GIT_CRLF_GUESS; - - if (strcmp(value, "input") == 0) - return GIT_CRLF_INPUT; - - if (strcmp(value, "auto") == 0) + else if (GIT_ATTR_UNSPECIFIED(value)) + ; + else if (strcmp(value, "input") == 0) + return GIT_CRLF_TEXT_INPUT; + else if (strcmp(value, "auto") == 0) return GIT_CRLF_AUTO; - return GIT_CRLF_GUESS; + return GIT_CRLF_UNDEFINED; } -static int check_eol(const char *value) +static git_cvar_value check_eol(const char *value) { if (GIT_ATTR_UNSPECIFIED(value)) - return GIT_EOL_UNSET; - - if (strcmp(value, "lf") == 0) + ; + else if (strcmp(value, "lf") == 0) return GIT_EOL_LF; - - if (strcmp(value, "crlf") == 0) + else if (strcmp(value, "crlf") == 0) return GIT_EOL_CRLF; return GIT_EOL_UNSET; } -static int crlf_input_action(struct crlf_attrs *ca) -{ - if (ca->crlf_action == GIT_CRLF_BINARY) - return GIT_CRLF_BINARY; - - if (ca->eol == GIT_EOL_LF) - return GIT_CRLF_INPUT; - - if (ca->crlf_action == GIT_CRLF_AUTO) - return GIT_CRLF_AUTO; - - if (ca->eol == GIT_EOL_CRLF) - return GIT_CRLF_CRLF; - - return ca->crlf_action; -} - static int has_cr_in_index(const git_filter_source *src) { git_repository *repo = git_filter_source_repo(src); @@ -122,147 +112,168 @@ static int has_cr_in_index(const git_filter_source *src) return found_cr; } -static int crlf_apply_to_odb( - struct crlf_attrs *ca, - git_buf *to, - const git_buf *from, - const git_filter_source *src) +static int text_eol_is_crlf(struct crlf_attrs *ca) { - /* Empty file? Nothing to do */ - if (!git_buf_len(from)) + if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE) + return 1; + else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT) return 0; - /* Heuristics to see if we can skip the conversion. - * Straight from Core Git. - */ - if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) { - git_buf_text_stats stats; + if (ca->core_eol == GIT_EOL_CRLF) + return 1; + if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF) + return 1; - /* Check heuristics for binary vs text - returns true if binary */ - if (git_buf_text_gather_stats(&stats, from, false)) - return GIT_PASSTHROUGH; + return 0; +} - /* If there are no CR characters to filter out, then just pass */ - if (!stats.cr) - return GIT_PASSTHROUGH; +static git_cvar_value output_eol(struct crlf_attrs *ca) +{ + switch (ca->crlf_action) { + case GIT_CRLF_BINARY: + return GIT_EOL_UNSET; + case GIT_CRLF_TEXT_CRLF: + return GIT_EOL_CRLF; + case GIT_CRLF_TEXT_INPUT: + return GIT_EOL_LF; + case GIT_CRLF_UNDEFINED: + case GIT_CRLF_AUTO_CRLF: + return GIT_EOL_CRLF; + case GIT_CRLF_AUTO_INPUT: + return GIT_EOL_LF; + case GIT_CRLF_TEXT: + case GIT_CRLF_AUTO: + return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF; + } + + /* TODO: warn when available */ + return ca->core_eol; +} + +GIT_INLINE(int) check_safecrlf( + struct crlf_attrs *ca, + const git_filter_source *src, + git_buf_text_stats *stats) +{ + const char *filename = git_filter_source_path(src); + + if (!ca->safe_crlf) + return 0; + + if (output_eol(ca) == GIT_EOL_LF) { + /* + * CRLFs would not be restored by checkout: + * check if we'd remove CRLFs + */ + if (stats->crlf) { + if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { + /* TODO: issue a warning when available */ + } else { + if (filename && *filename) + giterr_set( + GITERR_FILTER, "CRLF would be replaced by LF in '%s'", + filename); + else + giterr_set( + GITERR_FILTER, "CRLF would be replaced by LF"); - /* If safecrlf is enabled, sanity-check the result. */ - if (stats.cr != stats.crlf || stats.lf != stats.crlf) { - switch (ca->safe_crlf) { - case GIT_SAFE_CRLF_FAIL: - giterr_set( - GITERR_FILTER, "LF would be replaced by CRLF in '%s'", - git_filter_source_path(src)); return -1; - case GIT_SAFE_CRLF_WARN: - /* TODO: issue warning when warning API is available */; - break; - default: - break; } } - + } else if (output_eol(ca) == GIT_EOL_CRLF) { /* - * We're currently not going to even try to convert stuff - * that has bare CR characters. Does anybody do that crazy - * stuff? + * CRLFs would be added by checkout: + * check if we have "naked" LFs */ - if (stats.cr != stats.crlf) - return GIT_PASSTHROUGH; + if (stats->crlf != stats->lf) { + if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { + /* TODO: issue a warning when available */ + } else { + if (filename && *filename) + giterr_set( + GITERR_FILTER, "LF would be replaced by CRLF in '%s'", + filename); + else + giterr_set( + GITERR_FILTER, "LF would be replaced by CRLF"); - if (ca->crlf_action == GIT_CRLF_GUESS) { - /* - * If the file in the index has any CR in it, do not convert. - * This is the new safer autocrlf handling. - */ - if (has_cr_in_index(src)) - return GIT_PASSTHROUGH; + return -1; + } } - - if (!stats.cr) - return GIT_PASSTHROUGH; } - /* Actually drop the carriage returns */ - return git_buf_text_crlf_to_lf(to, from); + return 0; } -static const char *line_ending(struct crlf_attrs *ca) +static int crlf_apply_to_odb( + struct crlf_attrs *ca, + git_buf *to, + const git_buf *from, + const git_filter_source *src) { - switch (ca->crlf_action) { - case GIT_CRLF_BINARY: - case GIT_CRLF_INPUT: - return "\n"; + git_buf_text_stats stats; + bool is_binary; + int error; - case GIT_CRLF_CRLF: - return "\r\n"; + /* Binary attribute? Empty file? Nothing to do */ + if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from)) + return GIT_PASSTHROUGH; - case GIT_CRLF_GUESS: - if (ca->auto_crlf == GIT_AUTO_CRLF_FALSE) - return "\n"; - break; + is_binary = git_buf_text_gather_stats(&stats, from, false); - case GIT_CRLF_AUTO: - if (ca->eol == GIT_EOL_CRLF) - return "\r\n"; - case GIT_CRLF_TEXT: - break; + /* Heuristics to see if we can skip the conversion. + * Straight from Core Git. + */ + if (ca->crlf_action == GIT_CRLF_AUTO || + ca->crlf_action == GIT_CRLF_AUTO_INPUT || + ca->crlf_action == GIT_CRLF_AUTO_CRLF) { - default: - goto line_ending_error; + if (is_binary) + return GIT_PASSTHROUGH; + + /* + * If the file in the index has any CR in it, do not convert. + * This is the new safer autocrlf handling. + */ + if (has_cr_in_index(src)) + return GIT_PASSTHROUGH; } - if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE) - return "\r\n"; - else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT) - return "\n"; - else if (ca->eol == GIT_EOL_UNSET) - return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n"; - else if (ca->eol == GIT_EOL_LF) - return "\n"; - else if (ca->eol == GIT_EOL_CRLF) - return "\r\n"; - -line_ending_error: - giterr_set(GITERR_INVALID, "invalid input to line ending filter"); - return NULL; + if ((error = check_safecrlf(ca, src, &stats)) < 0) + return error; + + /* If there are no CR characters to filter out, then just pass */ + if (!stats.crlf) + return GIT_PASSTHROUGH; + + /* Actually drop the carriage returns */ + return git_buf_text_crlf_to_lf(to, from); } static int crlf_apply_to_workdir( - struct crlf_attrs *ca, git_buf *to, const git_buf *from) + struct crlf_attrs *ca, + git_buf *to, + const git_buf *from) { git_buf_text_stats stats; - const char *workdir_ending = NULL; bool is_binary; /* Empty file? Nothing to do. */ - if (git_buf_len(from) == 0) - return 0; - - /* Determine proper line ending */ - workdir_ending = line_ending(ca); - if (!workdir_ending) - return -1; - - /* only LF->CRLF conversion is supported, do nothing on LF platforms */ - if (strcmp(workdir_ending, "\r\n") != 0) + if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF) return GIT_PASSTHROUGH; - /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */ is_binary = git_buf_text_gather_stats(&stats, from, false); + /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */ if (stats.lf == 0 || stats.lf == stats.crlf) return GIT_PASSTHROUGH; if (ca->crlf_action == GIT_CRLF_AUTO || - ca->crlf_action == GIT_CRLF_GUESS) { + ca->crlf_action == GIT_CRLF_AUTO_INPUT || + ca->crlf_action == GIT_CRLF_AUTO_CRLF) { /* If we have any existing CR or CRLF line endings, do nothing */ - if (stats.cr > 0 && stats.crlf > 0) - return GIT_PASSTHROUGH; - - /* If we have bare CR characters, do nothing */ - if (stats.cr != stats.crlf) + if (stats.cr > 0) return GIT_PASSTHROUGH; /* Don't filter binary files */ @@ -273,69 +284,80 @@ static int crlf_apply_to_workdir( return git_buf_text_lf_to_crlf(to, from); } -static int crlf_check( - git_filter *self, - void **payload, /* points to NULL ptr on entry, may be set */ - const git_filter_source *src, - const char **attr_values) +static int convert_attrs( + struct crlf_attrs *ca, + const char **attr_values, + const git_filter_source *src) { int error; - struct crlf_attrs ca; - GIT_UNUSED(self); + memset(ca, 0, sizeof(struct crlf_attrs)); + + if ((error = git_repository__cvar(&ca->auto_crlf, + git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF)) < 0 || + (error = git_repository__cvar(&ca->safe_crlf, + git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF)) < 0 || + (error = git_repository__cvar(&ca->core_eol, + git_filter_source_repo(src), GIT_CVAR_EOL)) < 0) + return error; + + /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ + if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && + ca->safe_crlf == GIT_SAFE_CRLF_FAIL) + ca->safe_crlf = GIT_SAFE_CRLF_WARN; + + if (attr_values) { + /* load the text attribute */ + ca->crlf_action = check_crlf(attr_values[2]); /* text */ + + if (ca->crlf_action == GIT_CRLF_UNDEFINED) + ca->crlf_action = check_crlf(attr_values[0]); /* crlf */ + + if (ca->crlf_action != GIT_CRLF_BINARY) { + /* load the eol attribute */ + int eol_attr = check_eol(attr_values[1]); + + if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF) + ca->crlf_action = GIT_CRLF_AUTO_INPUT; + else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF) + ca->crlf_action = GIT_CRLF_AUTO_CRLF; + else if (eol_attr == GIT_EOL_LF) + ca->crlf_action = GIT_CRLF_TEXT_INPUT; + else if (eol_attr == GIT_EOL_CRLF) + ca->crlf_action = GIT_CRLF_TEXT_CRLF; + } - if (!attr_values) { - ca.crlf_action = GIT_CRLF_GUESS; - ca.eol = GIT_EOL_UNSET; + ca->attr_action = ca->crlf_action; } else { - ca.crlf_action = check_crlf(attr_values[2]); /* text */ - if (ca.crlf_action == GIT_CRLF_GUESS) - ca.crlf_action = check_crlf(attr_values[0]); /* clrf */ - ca.eol = check_eol(attr_values[1]); /* eol */ + ca->crlf_action = GIT_CRLF_UNDEFINED; } - ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT; - ca.safe_crlf = GIT_SAFE_CRLF_DEFAULT; - /* - * Use the core Git logic to see if we should perform CRLF for this file - * based on its attributes & the value of `core.autocrlf` - */ - ca.crlf_action = crlf_input_action(&ca); + if (ca->crlf_action == GIT_CRLF_TEXT) + ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT; + if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE) + ca->crlf_action = GIT_CRLF_BINARY; + if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE) + ca->crlf_action = GIT_CRLF_AUTO_CRLF; + if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT) + ca->crlf_action = GIT_CRLF_AUTO_INPUT; - if (ca.crlf_action == GIT_CRLF_BINARY) - return GIT_PASSTHROUGH; - - if (ca.crlf_action == GIT_CRLF_GUESS || - ((ca.crlf_action == GIT_CRLF_AUTO || - ca.crlf_action == GIT_CRLF_TEXT) && - git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) { - - error = git_repository__cvar( - &ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF); - if (error < 0) - return error; + return 0; +} - if (ca.crlf_action == GIT_CRLF_GUESS && - ca.auto_crlf == GIT_AUTO_CRLF_FALSE) - return GIT_PASSTHROUGH; +static int crlf_check( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + const git_filter_source *src, + const char **attr_values) +{ + struct crlf_attrs ca; - if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT && - ca.eol != GIT_EOL_CRLF && - git_filter_source_mode(src) == GIT_FILTER_SMUDGE) - return GIT_PASSTHROUGH; - } + GIT_UNUSED(self); - if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) { - error = git_repository__cvar( - &ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF); - if (error < 0) - return error; + convert_attrs(&ca, attr_values, src); - /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ - if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && - ca.safe_crlf == GIT_SAFE_CRLF_FAIL) - ca.safe_crlf = GIT_SAFE_CRLF_WARN; - } + if (ca.crlf_action == GIT_CRLF_BINARY) + return GIT_PASSTHROUGH; *payload = git__malloc(sizeof(ca)); GITERR_CHECK_ALLOC(*payload); @@ -345,15 +367,16 @@ static int crlf_check( } static int crlf_apply( - git_filter *self, - void **payload, /* may be read and/or set */ - git_buf *to, + git_filter *self, + void **payload, /* may be read and/or set */ + git_buf *to, const git_buf *from, const git_filter_source *src) { /* initialize payload in case `check` was bypassed */ if (!*payload) { int error = crlf_check(self, payload, src, NULL); + if (error < 0) return error; } diff --git a/src/filter.h b/src/filter.h index b1c403ba9..34081fb4a 100644 --- a/src/filter.h +++ b/src/filter.h @@ -15,16 +15,6 @@ /* Amount of file to examine for NUL byte when checking binary-ness */ #define GIT_FILTER_BYTES_TO_CHECK_NUL 8000 -/* Possible CRLF values */ -typedef enum { - GIT_CRLF_GUESS = -1, - GIT_CRLF_BINARY = 0, - GIT_CRLF_TEXT, - GIT_CRLF_INPUT, - GIT_CRLF_CRLF, - GIT_CRLF_AUTO, -} git_crlf_t; - typedef struct { git_attr_session *attr_session; git_buf *temp_buf; |