summaryrefslogtreecommitdiff
path: root/ext/standard/string.c
diff options
context:
space:
mode:
authorjim winstead <jimw@php.net>2002-01-05 20:46:43 +0000
committerjim winstead <jimw@php.net>2002-01-05 20:46:43 +0000
commitca15b22212d3ea30c931e4cb7f4e1be869ed36fc (patch)
tree625d05aa1b7484346d79a5b64070c8fed07a2313 /ext/standard/string.c
parente56fb1639b9c5da4f4d6b6b7b9c99888bfdd045b (diff)
downloadphp-git-ca15b22212d3ea30c931e4cb7f4e1be869ed36fc.tar.gz
New memcpy()-based wordwrap() implementation. The simple case
(single-character break, no forced break) appears to be about 60% faster, and there's simply no comparison for non-simple cases with non-trivial amounts of text. The old algorithm was O(n^2) (with an unfortunately large constant factor) because of the use of strncat(), the new one is O(n). Added some more tests, too. @ - Made wordwrap() significantly faster. (Jim) # test case: $t = join('',file('ChangeLog')); $w = wordwrap($t,10,"\n",1); # new code completes in less than a second. i'm still waiting for the # old code to finish.
Diffstat (limited to 'ext/standard/string.c')
-rw-r--r--ext/standard/string.c173
1 files changed, 75 insertions, 98 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c
index 6b897346e0..3149130089 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -613,9 +613,11 @@ PHP_FUNCTION(ltrim)
Wraps buffer to selected number of characters using string break char */
PHP_FUNCTION(wordwrap)
{
- char *text, *breakchar = "\n", *newtext;
+ const char *text, *breakchar = "\n";
+ char *newtext;
int textlen, breakcharlen = 1, newtextlen;
- long linelength = 75, i = 0, l = 0, pgr = 0, last = 0;
+ long current = 0, laststart = 0, lastspace = 0;
+ long linelength = 75;
zend_bool docut = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &text, &textlen, &linelength, &breakchar, &breakcharlen, &docut) == FAILURE) {
@@ -634,121 +636,96 @@ PHP_FUNCTION(wordwrap)
additional storage space */
if (breakcharlen == 1 && !docut) {
newtext = estrndup(text, textlen);
- while (newtext[i] != '\0') {
- /* prescan line to see if it is greater than linelength */
- l = 0;
- while (newtext[i+l] != breakchar[0]) {
- if (newtext[i+l] == '\0') {
- l--;
- break;
- }
- l++;
+ laststart = lastspace = 0;
+ for (current = 0; current < textlen; current++) {
+ if (text[current] == breakchar[0]) {
+ laststart = lastspace = current;
}
-
- if (l >= linelength) {
- pgr = l;
- l = linelength;
-
- /* needs breaking; work backwards to find previous word */
- while (l >= 0) {
- if (newtext[i+l] == ' ') {
- newtext[i+l] = breakchar[0];
- break;
- }
- l--;
- }
-
- if (l == -1) {
- /* couldn't break is backwards, try looking forwards */
- l = linelength;
- while (l <= pgr) {
- if(newtext[i+l] == ' ') {
- newtext[i+l] = breakchar[0];
- break;
- }
- l++;
- }
+ else if (text[current] == ' ') {
+ if (current - laststart >= linelength) {
+ newtext[current] = breakchar[0];
+ laststart = current;
}
+ lastspace = current;
+ }
+ else if (current - laststart >= linelength
+ && laststart != lastspace) {
+ newtext[lastspace] = breakchar[0];
+ laststart = lastspace;
}
-
- i += l + 1;
}
RETURN_STRINGL(newtext, textlen, 0);
}
else {
- /* Multiple character line break */
- newtextlen = textlen * (breakcharlen + 1) + 1;
+ /* Multiple character line break or forced cut */
+ if (linelength > 0) {
+ newtextlen = textlen + (textlen/linelength) * breakcharlen + 1;
+ }
+ else {
+ newtextlen = textlen * (breakcharlen + 1) + 1;
+ }
newtext = emalloc(newtextlen);
- newtext[0] = '\0';
- i = 0;
- while (text[i] != '\0') {
-
- /* prescan line to see if it is greater than linelength */
- l = 0;
- while (text[i+l] != '\0') {
- if (text[i+l] == breakchar[0]) {
- if (breakcharlen == 1 || !strncmp(text+i+l, breakchar, breakcharlen))
- break;
- }
- l++;
+ /* now keep track of the actual new text length */
+ newtextlen = 0;
+
+ laststart = lastspace = 0;
+ for (current = 0; current < textlen; current++) {
+ /* when we hit an existing break, copy to new buffer, and
+ * fix up laststart and lastspace */
+ if (text[current] == breakchar[0]
+ && current + breakcharlen < textlen
+ && !strncmp(text+current, breakchar, breakcharlen)) {
+ memcpy(newtext+newtextlen, text+laststart, current-laststart+breakcharlen);
+ newtextlen += current-laststart+breakcharlen;
+ current += breakcharlen - 1;
+ laststart = lastspace = current + 1;
}
-
- if (l >= linelength) {
- pgr = l;
- l = linelength;
-
- /* needs breaking; work backwards to find previous word */
- while (l >= 0) {
- if (text[i+l] == ' ') {
- strncat(newtext, text+last, i+l-last);
- strncat(newtext, breakchar, breakcharlen);
- last = i + l + 1;
- break;
- }
- l--;
+ /* if it is a space, check if it is at the line boundary,
+ * copy and insert a break, or just keep track of it */
+ else if (text[current] == ' ') {
+ if (current - laststart >= linelength) {
+ memcpy(newtext+newtextlen, text+laststart, current-laststart);
+ newtextlen += current - laststart;
+ memcpy(newtext+newtextlen, breakchar, breakcharlen);
+ newtextlen += breakcharlen;
+ laststart = current + 1;
}
-
- if (l == -1) {
- /* couldn't break it backwards, try looking forwards */
- l = linelength - 1;
- while (l <= pgr) {
- if (!docut) {
- if (text[i+l] == ' ') {
- strncat(newtext, text+last, i+l-last);
- strncat(newtext, breakchar, breakcharlen);
- last = i + l + 1;
- ++l;
- break;
- }
- }
- /* cut if longer than allowed */
- else {
- if (text[i+l] == ' ' || l > i-last) {
- strncat(newtext, text+last, i+l-last+1);
- strncat(newtext, breakchar, breakcharlen);
- last = i + l + 1;
- ++l;
- break;
- }
- }
- l++;
- }
- }
- i += l + 1;
+ lastspace = current;
}
- else {
- i += (l ? l : 1);
+ /* if we are cutting, and we've accumulated enough
+ * characters, copy and insert a break. */
+ else if (current - laststart >= linelength && docut) {
+ memcpy(newtext+newtextlen, text+laststart, current-laststart);
+ newtextlen += current - laststart;
+ memcpy(newtext+newtextlen, breakchar, breakcharlen);
+ newtextlen += breakcharlen;
+ laststart = lastspace = current;
+ }
+ /* if the current word puts us over the linelength, copy
+ * back up until the last space, insert a break, and move
+ * up the laststart */
+ else if (current - laststart >= linelength
+ && laststart < lastspace) {
+ memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
+ newtextlen += lastspace - laststart;
+ memcpy(newtext+newtextlen, breakchar, breakcharlen);
+ newtextlen += breakcharlen;
+ laststart = lastspace = lastspace + 1;
}
}
- if (i + l > last) {
- strncat(newtext, text+last, i+l-last);
+ /* copy over any stragglers */
+ if (laststart != current) {
+ memcpy(newtext+newtextlen, text+laststart, current-laststart);
+ newtextlen += current - laststart;
}
- RETURN_STRINGL(newtext, strlen(newtext), 0);
+ newtext[newtextlen] = '\0';
+
+ RETURN_STRINGL(newtext, newtextlen, 0);
}
}
/* }}} */