diff options
Diffstat (limited to 'regex/split.c')
| -rw-r--r-- | regex/split.c | 316 | 
1 files changed, 316 insertions, 0 deletions
diff --git a/regex/split.c b/regex/split.c new file mode 100644 index 0000000000..188bdb775b --- /dev/null +++ b/regex/split.c @@ -0,0 +1,316 @@ +#include <stdio.h> +#include <string.h> + +/* + - split - divide a string into fields, like awk split() + = int split(char *string, char *fields[], int nfields, char *sep); + */ +int				/* number of fields, including overflow */ +split(string, fields, nfields, sep) +char *string; +char *fields[];			/* list is not NULL-terminated */ +int nfields;			/* number of entries available in fields[] */ +char *sep;			/* "" white, "c" single char, "ab" [ab]+ */ +{ +	register char *p = string; +	register char c;			/* latest character */ +	register char sepc = sep[0]; +	register char sepc2; +	register int fn; +	register char **fp = fields; +	register char *sepp; +	register int trimtrail; + +	/* white space */ +	if (sepc == '\0') { +		while ((c = *p++) == ' ' || c == '\t') +			continue; +		p--; +		trimtrail = 1; +		sep = " \t";	/* note, code below knows this is 2 long */ +		sepc = ' '; +	} else +		trimtrail = 0; +	sepc2 = sep[1];		/* now we can safely pick this up */ + +	/* catch empties */ +	if (*p == '\0') +		return(0); + +	/* single separator */ +	if (sepc2 == '\0') { +		fn = nfields; +		for (;;) { +			*fp++ = p; +			fn--; +			if (fn == 0) +				break; +			while ((c = *p++) != sepc) +				if (c == '\0') +					return(nfields - fn); +			*(p-1) = '\0'; +		} +		/* we have overflowed the fields vector -- just count them */ +		fn = nfields; +		for (;;) { +			while ((c = *p++) != sepc) +				if (c == '\0') +					return(fn); +			fn++; +		} +		/* not reached */ +	} + +	/* two separators */ +	if (sep[2] == '\0') { +		fn = nfields; +		for (;;) { +			*fp++ = p; +			fn--; +			while ((c = *p++) != sepc && c != sepc2) +				if (c == '\0') { +					if (trimtrail && **(fp-1) == '\0') +						fn++; +					return(nfields - fn); +				} +			if (fn == 0) +				break; +			*(p-1) = '\0'; +			while ((c = *p++) == sepc || c == sepc2) +				continue; +			p--; +		} +		/* we have overflowed the fields vector -- just count them */ +		fn = nfields; +		while (c != '\0') { +			while ((c = *p++) == sepc || c == sepc2) +				continue; +			p--; +			fn++; +			while ((c = *p++) != '\0' && c != sepc && c != sepc2) +				continue; +		} +		/* might have to trim trailing white space */ +		if (trimtrail) { +			p--; +			while ((c = *--p) == sepc || c == sepc2) +				continue; +			p++; +			if (*p != '\0') { +				if (fn == nfields+1) +					*p = '\0'; +				fn--; +			} +		} +		return(fn); +	} + +	/* n separators */ +	fn = 0; +	for (;;) { +		if (fn < nfields) +			*fp++ = p; +		fn++; +		for (;;) { +			c = *p++; +			if (c == '\0') +				return(fn); +			sepp = sep; +			while ((sepc = *sepp++) != '\0' && sepc != c) +				continue; +			if (sepc != '\0')	/* it was a separator */ +				break; +		} +		if (fn < nfields) +			*(p-1) = '\0'; +		for (;;) { +			c = *p++; +			sepp = sep; +			while ((sepc = *sepp++) != '\0' && sepc != c) +				continue; +			if (sepc == '\0')	/* it wasn't a separator */ +				break; +		} +		p--; +	} + +	/* not reached */ +} + +#ifdef TEST_SPLIT + + +/* + * test program + * pgm		runs regression + * pgm sep	splits stdin lines by sep + * pgm str sep	splits str by sep + * pgm str sep n	splits str by sep n times + */ +int +main(argc, argv) +int argc; +char *argv[]; +{ +	char buf[512]; +	register int n; +#	define	MNF	10 +	char *fields[MNF]; + +	if (argc > 4) +		for (n = atoi(argv[3]); n > 0; n--) { +			(void) strcpy(buf, argv[1]); +		} +	else if (argc > 3) +		for (n = atoi(argv[3]); n > 0; n--) { +			(void) strcpy(buf, argv[1]); +			(void) split(buf, fields, MNF, argv[2]); +		} +	else if (argc > 2) +		dosplit(argv[1], argv[2]); +	else if (argc > 1) +		while (fgets(buf, sizeof(buf), stdin) != NULL) { +			buf[strlen(buf)-1] = '\0';	/* stomp newline */ +			dosplit(buf, argv[1]); +		} +	else +		regress(); + +	exit(0); +} + +dosplit(string, seps) +char *string; +char *seps; +{ +#	define	NF	5 +	char *fields[NF]; +	register int nf; + +	nf = split(string, fields, NF, seps); +	print(nf, NF, fields); +} + +print(nf, nfp, fields) +int nf; +int nfp; +char *fields[]; +{ +	register int fn; +	register int bound; + +	bound = (nf > nfp) ? nfp : nf; +	printf("%d:\t", nf); +	for (fn = 0; fn < bound; fn++) +		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); +} + +#define	RNF	5		/* some table entries know this */ +struct { +	char *str; +	char *seps; +	int nf; +	char *fi[RNF]; +} tests[] = { +	"",		" ",	0,	{ "" }, +	" ",		" ",	2,	{ "", "" }, +	"x",		" ",	1,	{ "x" }, +	"xy",		" ",	1,	{ "xy" }, +	"x y",		" ",	2,	{ "x", "y" }, +	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" }, +	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" }, +	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" }, +	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " }, + +	"",		" _",	0,	{ "" }, +	" ",		" _",	2,	{ "", "" }, +	"x",		" _",	1,	{ "x" }, +	"x y",		" _",	2,	{ "x", "y" }, +	"ab _ cd",	" _",	2,	{ "ab", "cd" }, +	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" }, +	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" }, +	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " }, + +	"",		" _~",	0,	{ "" }, +	" ",		" _~",	2,	{ "", "" }, +	"x",		" _~",	1,	{ "x" }, +	"x y",		" _~",	2,	{ "x", "y" }, +	"ab _~ cd",	" _~",	2,	{ "ab", "cd" }, +	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" }, +	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" }, +	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " }, + +	"",		" _~-",	0,	{ "" }, +	" ",		" _~-",	2,	{ "", "" }, +	"x",		" _~-",	1,	{ "x" }, +	"x y",		" _~-",	2,	{ "x", "y" }, +	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" }, +	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" }, +	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" }, +	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " }, + +	"",		"  ",	0,	{ "" }, +	" ",		"  ",	2,	{ "", "" }, +	"x",		"  ",	1,	{ "x" }, +	"xy",		"  ",	1,	{ "xy" }, +	"x y",		"  ",	2,	{ "x", "y" }, +	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" }, +	"  a bcd",	"  ",	3,	{ "", "a", "bcd" }, +	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" }, +	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " }, + +	"",		"",	0,	{ "" }, +	" ",		"",	0,	{ "" }, +	"x",		"",	1,	{ "x" }, +	"xy",		"",	1,	{ "xy" }, +	"x y",		"",	2,	{ "x", "y" }, +	"abc def  g ",	"",	3,	{ "abc", "def", "g" }, +	"\t a bcd",	"",	2,	{ "a", "bcd" }, +	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" }, +	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" }, +	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" }, +	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " }, + +	NULL,		NULL,	0,	{ NULL }, +}; + +regress() +{ +	char buf[512]; +	register int n; +	char *fields[RNF+1]; +	register int nf; +	register int i; +	register int printit; +	register char *f; + +	for (n = 0; tests[n].str != NULL; n++) { +		(void) strcpy(buf, tests[n].str); +		fields[RNF] = NULL; +		nf = split(buf, fields, RNF, tests[n].seps); +		printit = 0; +		if (nf != tests[n].nf) { +			printf("split `%s' by `%s' gave %d fields, not %d\n", +				tests[n].str, tests[n].seps, nf, tests[n].nf); +			printit = 1; +		} else if (fields[RNF] != NULL) { +			printf("split() went beyond array end\n"); +			printit = 1; +		} else { +			for (i = 0; i < nf && i < RNF; i++) { +				f = fields[i]; +				if (f == NULL) +					f = "(NULL)"; +				if (strcmp(f, tests[n].fi[i]) != 0) { +					printf("split `%s' by `%s', field %d is `%s', not `%s'\n", +						tests[n].str, tests[n].seps, +						i, fields[i], tests[n].fi[i]); +					printit = 1; +				} +			} +		} +		if (printit) +			print(nf, RNF, fields); +	} +} +#endif  | 
