diff options
author | Junio C Hamano <junkio@cox.net> | 2007-02-26 01:20:42 -0800 |
---|---|---|
committer | Junio C Hamano <junkio@cox.net> | 2007-02-26 01:20:42 -0800 |
commit | 048f48a2fdefdf71e7af19ec7111000ce2ebf52e (patch) | |
tree | ee91b56c9071972b1585fa65838da5748c0abfd9 /convert.c | |
parent | 646b3299613f0dd947557bc965660986d024322b (diff) | |
parent | c260d790c85c07a5f50235f664c36725deedfb10 (diff) | |
download | git-048f48a2fdefdf71e7af19ec7111000ce2ebf52e.tar.gz |
Merge branch 'master' into js/diff-ni
* master: (201 commits)
Documentation: link in 1.5.0.2 material to the top documentation page.
Documentation: document remote.<name>.tagopt
GIT 1.5.0.2
git-remote: support remotes with a dot in the name
Documentation: describe "-f/-t/-m" options to "git-remote add"
diff --cc: fix display of symlink conflicts during a merge.
merge-recursive: fix longstanding bug in merging symlinks
merge-index: fix longstanding bug in merging symlinks
diff --cached: give more sensible error message when HEAD is yet to be created.
Update tests to use test-chmtime
Add test-chmtime: a utility to change mtime on files
Add Release Notes to prepare for 1.5.0.2
Allow arbitrary number of arguments to git-pack-objects
rerere: do not deal with symlinks.
rerere: do not skip two conflicted paths next to each other.
Don't modify CREDITS-FILE if it hasn't changed.
diff-patch: Avoid emitting double-slashes in textual patch.
Reword git-am 3-way fallback failure message.
Limit filename for format-patch
core.legacyheaders: Use the description used in RelNotes-1.5.0
...
Diffstat (limited to 'convert.c')
-rw-r--r-- | convert.c | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/convert.c b/convert.c new file mode 100644 index 0000000000..898bfe3eb2 --- /dev/null +++ b/convert.c @@ -0,0 +1,186 @@ +#include "cache.h" +/* + * convert.c - convert a file when checking it out and checking it in. + * + * This should use the pathname to decide on whether it wants to do some + * more interesting conversions (automatic gzip/unzip, general format + * conversions etc etc), but by default it just does automatic CRLF<->LF + * translation when the "auto_crlf" option is set. + */ + +struct text_stat { + /* CR, LF and CRLF counts */ + unsigned cr, lf, crlf; + + /* These are just approximations! */ + unsigned printable, nonprintable; +}; + +static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats) +{ + unsigned long i; + + memset(stats, 0, sizeof(*stats)); + + for (i = 0; i < size; i++) { + unsigned char c = buf[i]; + if (c == '\r') { + stats->cr++; + if (i+1 < size && buf[i+1] == '\n') + stats->crlf++; + continue; + } + if (c == '\n') { + stats->lf++; + continue; + } + if (c == 127) + /* DEL */ + stats->nonprintable++; + else if (c < 32) { + switch (c) { + /* BS, HT, ESC and FF */ + case '\b': case '\t': case '\033': case '\014': + stats->printable++; + break; + default: + stats->nonprintable++; + } + } + else + stats->printable++; + } +} + +/* + * The same heuristics as diff.c::mmfile_is_binary() + */ +static int is_binary(unsigned long size, struct text_stat *stats) +{ + + if ((stats->printable >> 7) < stats->nonprintable) + return 1; + /* + * Other heuristics? Average line length might be relevant, + * as might LF vs CR vs CRLF counts.. + * + * NOTE! It might be normal to have a low ratio of CRLF to LF + * (somebody starts with a LF-only file and edits it with an editor + * that adds CRLF only to lines that are added..). But do we + * want to support CR-only? Probably not. + */ + return 0; +} + +int convert_to_git(const char *path, char **bufp, unsigned long *sizep) +{ + char *buffer, *nbuf; + unsigned long size, nsize; + struct text_stat stats; + + /* + * FIXME! Other pluggable conversions should go here, + * based on filename patterns. Right now we just do the + * stupid auto-CRLF one. + */ + if (!auto_crlf) + return 0; + + size = *sizep; + if (!size) + return 0; + buffer = *bufp; + + gather_stats(buffer, size, &stats); + + /* No CR? Nothing to convert, regardless. */ + if (!stats.cr) + return 0; + + /* + * We're currently not going to even try to convert stuff + * that has bare CR characters. Does anybody do that crazy + * stuff? + */ + if (stats.cr != stats.crlf) + return 0; + + /* + * And add some heuristics for binary vs text, of course... + */ + if (is_binary(size, &stats)) + return 0; + + /* + * Ok, allocate a new buffer, fill it in, and return true + * to let the caller know that we switched buffers on it. + */ + nsize = size - stats.crlf; + nbuf = xmalloc(nsize); + *bufp = nbuf; + *sizep = nsize; + do { + unsigned char c = *buffer++; + if (c != '\r') + *nbuf++ = c; + } while (--size); + + return 1; +} + +int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep) +{ + char *buffer, *nbuf; + unsigned long size, nsize; + struct text_stat stats; + unsigned char last; + + /* + * FIXME! Other pluggable conversions should go here, + * based on filename patterns. Right now we just do the + * stupid auto-CRLF one. + */ + if (auto_crlf <= 0) + return 0; + + size = *sizep; + if (!size) + return 0; + buffer = *bufp; + + gather_stats(buffer, size, &stats); + + /* No LF? Nothing to convert, regardless. */ + if (!stats.lf) + return 0; + + /* Was it already in CRLF format? */ + if (stats.lf == stats.crlf) + return 0; + + /* If we have any bare CR characters, we're not going to touch it */ + if (stats.cr != stats.crlf) + return 0; + + if (is_binary(size, &stats)) + return 0; + + /* + * Ok, allocate a new buffer, fill it in, and return true + * to let the caller know that we switched buffers on it. + */ + nsize = size + stats.lf - stats.crlf; + nbuf = xmalloc(nsize); + *bufp = nbuf; + *sizep = nsize; + last = 0; + do { + unsigned char c = *buffer++; + if (c == '\n' && last != '\r') + *nbuf++ = '\r'; + *nbuf++ = c; + last = c; + } while (--size); + + return 1; +} |