Merge branch 'master' into js/diff-ni

* master: (201 commits) Documentation: link in 1.5.0.2 material to the top documentation page. Documentation: document remote.<name>.tagopt GIT 1.5.0.2 git-remote: support remotes with a dot in the name Documentation: describe "-f/-t/-m" options to "git-remote add" diff --cc: fix display of symlink conflicts during a merge. merge-recursive: fix longstanding bug in merging symlinks merge-index: fix longstanding bug in merging symlinks diff --cached: give more sensible error message when HEAD is yet to be created. Update tests to use test-chmtime Add test-chmtime: a utility to change mtime on files Add Release Notes to prepare for 1.5.0.2 Allow arbitrary number of arguments to git-pack-objects rerere: do not deal with symlinks. rerere: do not skip two conflicted paths next to each other. Don't modify CREDITS-FILE if it hasn't changed. diff-patch: Avoid emitting double-slashes in textual patch. Reword git-am 3-way fallback failure message. Limit filename for format-patch core.legacyheaders: Use the description used in RelNotes-1.5.0 ...
author: Junio C Hamano <junkio@cox.net> 2007-02-26 01:20:42 -0800
committer: Junio C Hamano <junkio@cox.net> 2007-02-26 01:20:42 -0800
commit: 048f48a2fdefdf71e7af19ec7111000ce2ebf52e (patch)
tree: ee91b56c9071972b1585fa65838da5748c0abfd9 /convert.c
parent: 646b3299613f0dd947557bc965660986d024322b (diff)
parent: c260d790c85c07a5f50235f664c36725deedfb10 (diff)
download: git-048f48a2fdefdf71e7af19ec7111000ce2ebf52e.tar.gz
1 files changed, 186 insertions, 0 deletions
diff --git a/convert.c b/convert.c
new file mode 100644
index 0000000000..898bfe3eb2
--- /dev/null
+++ b/convert.c
@@ -0,0 +1,186 @@
+#include "cache.h"
+/*
+ * convert.c - convert a file when checking it out and checking it in.
+ *
+ * This should use the pathname to decide on whether it wants to do some
+ * more interesting conversions (automatic gzip/unzip, general format
+ * conversions etc etc), but by default it just does automatic CRLF<->LF
+ * translation when the "auto_crlf" option is set.
+ */
+
+struct text_stat {
+	/* CR, LF and CRLF counts */
+	unsigned cr, lf, crlf;
+
+	/* These are just approximations! */
+	unsigned printable, nonprintable;
+};
+
+static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
+{
+	unsigned long i;
+
+	memset(stats, 0, sizeof(*stats));
+
+	for (i = 0; i < size; i++) {
+		unsigned char c = buf[i];
+		if (c == '\r') {
+			stats->cr++;
+			if (i+1 < size && buf[i+1] == '\n')
+				stats->crlf++;
+			continue;
+		}
+		if (c == '\n') {
+			stats->lf++;
+			continue;
+		}
+		if (c == 127)
+			/* DEL */
+			stats->nonprintable++;
+		else if (c < 32) {
+			switch (c) {
+				/* BS, HT, ESC and FF */
+			case '\b': case '\t': case '\033': case '\014':
+				stats->printable++;
+				break;
+			default:
+				stats->nonprintable++;
+			}
+		}
+		else
+			stats->printable++;
+	}
+}
+
+/*
+ * The same heuristics as diff.c::mmfile_is_binary()
+ */
+static int is_binary(unsigned long size, struct text_stat *stats)
+{
+
+	if ((stats->printable >> 7) < stats->nonprintable)
+		return 1;
+	/*
+	 * Other heuristics? Average line length might be relevant,
+	 * as might LF vs CR vs CRLF counts..
+	 *
+	 * NOTE! It might be normal to have a low ratio of CRLF to LF
+	 * (somebody starts with a LF-only file and edits it with an editor
+	 * that adds CRLF only to lines that are added..). But do  we
+	 * want to support CR-only? Probably not.
+	 */
+	return 0;
+}
+
+int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+	char *buffer, *nbuf;
+	unsigned long size, nsize;
+	struct text_stat stats;
+
+	/*
+	 * FIXME! Other pluggable conversions should go here,
+	 * based on filename patterns. Right now we just do the
+	 * stupid auto-CRLF one.
+	 */
+	if (!auto_crlf)
+		return 0;
+
+	size = *sizep;
+	if (!size)
+		return 0;
+	buffer = *bufp;
+
+	gather_stats(buffer, size, &stats);
+
+	/* No CR? Nothing to convert, regardless. */
+	if (!stats.cr)
+		return 0;
+
+	/*
+	 * We're currently not going to even try to convert stuff
+	 * that has bare CR characters. Does anybody do that crazy
+	 * stuff?
+	 */
+	if (stats.cr != stats.crlf)
+		return 0;
+
+	/*
+	 * And add some heuristics for binary vs text, of course...
+	 */
+	if (is_binary(size, &stats))
+		return 0;
+
+	/*
+	 * Ok, allocate a new buffer, fill it in, and return true
+	 * to let the caller know that we switched buffers on it.
+	 */
+	nsize = size - stats.crlf;
+	nbuf = xmalloc(nsize);
+	*bufp = nbuf;
+	*sizep = nsize;
+	do {
+		unsigned char c = *buffer++;
+		if (c != '\r')
+			*nbuf++ = c;
+	} while (--size);
+
+	return 1;
+}
+
+int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+	char *buffer, *nbuf;
+	unsigned long size, nsize;
+	struct text_stat stats;
+	unsigned char last;
+
+	/*
+	 * FIXME! Other pluggable conversions should go here,
+	 * based on filename patterns. Right now we just do the
+	 * stupid auto-CRLF one.
+	 */
+	if (auto_crlf <= 0)
+		return 0;
+
+	size = *sizep;
+	if (!size)
+		return 0;
+	buffer = *bufp;
+
+	gather_stats(buffer, size, &stats);
+
+	/* No LF? Nothing to convert, regardless. */
+	if (!stats.lf)
+		return 0;
+
+	/* Was it already in CRLF format? */
+	if (stats.lf == stats.crlf)
+		return 0;
+
+	/* If we have any bare CR characters, we're not going to touch it */
+	if (stats.cr != stats.crlf)
+		return 0;
+
+	if (is_binary(size, &stats))
+		return 0;
+
+	/*
+	 * Ok, allocate a new buffer, fill it in, and return true
+	 * to let the caller know that we switched buffers on it.
+	 */
+	nsize = size + stats.lf - stats.crlf;
+	nbuf = xmalloc(nsize);
+	*bufp = nbuf;
+	*sizep = nsize;
+	last = 0;
+	do {
+		unsigned char c = *buffer++;
+		if (c == '\n' && last != '\r')
+			*nbuf++ = '\r';
+		*nbuf++ = c;
+		last = c;
+	} while (--size);
+
+	return 1;
+}
author	Junio C Hamano <junkio@cox.net>	2007-02-26 01:20:42 -0800
committer	Junio C Hamano <junkio@cox.net>	2007-02-26 01:20:42 -0800
commit	048f48a2fdefdf71e7af19ec7111000ce2ebf52e (patch)
tree	ee91b56c9071972b1585fa65838da5748c0abfd9 /convert.c
parent	646b3299613f0dd947557bc965660986d024322b (diff)
parent	c260d790c85c07a5f50235f664c36725deedfb10 (diff)
download	git-048f48a2fdefdf71e7af19ec7111000ce2ebf52e.tar.gz