summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Zmievski <andrei@php.net>2002-05-23 17:22:05 +0000
committerAndrei Zmievski <andrei@php.net>2002-05-23 17:22:05 +0000
commit001b4c71e4a5c9b8c06241e3bf8e8fea2b2a11c9 (patch)
treefb419375e35f29d769f85047dd5f8205c57cc420
parentfc059f5e40ff81535901a3d04c889abaf8b5b485 (diff)
downloadphp-git-001b4c71e4a5c9b8c06241e3bf8e8fea2b2a11c9.tar.gz
This code adds string offset capturing in preg_split() results. Original
patch by David Brown, modified by me.
-rw-r--r--NEWS1
-rw-r--r--ext/pcre/php_pcre.c65
2 files changed, 55 insertions, 11 deletions
diff --git a/NEWS b/NEWS
index f6b1f38f37..3f2c1eca5a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
PHP 4 NEWS
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
?? ??? 2002, Version 4.3.0
+- Added ability to capture string offsets in preg_split() results. (David Brown, Andrei)
- Fixed a crash bug in token_get_all(). (Andrei)
- Implemented glob() for Unix/Win32. (Hartmut, Edin, Markus)
- Added domxml_doc_set_root() to change the root node. (Lukas Schroeder)
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 964b3d508c..fd7b7ef1d3 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -37,6 +37,7 @@
#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
+#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
#define PREG_REPLACE_EVAL (1<<0)
@@ -100,6 +101,7 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
return SUCCESS;
}
@@ -1062,6 +1064,21 @@ PHP_FUNCTION(preg_replace_callback)
}
/* }}} */
+static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+{
+ zval *match_pair;
+
+ ALLOC_ZVAL(match_pair);
+ array_init(match_pair);
+ INIT_PZVAL(match_pair);
+
+ /* Add (match, offset) to the return value */
+ add_next_index_stringl(match_pair, str, len, 1);
+ add_next_index_long(match_pair, offset);
+
+ zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
+}
+
/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTION(preg_split)
@@ -1080,8 +1097,10 @@ PHP_FUNCTION(preg_split)
int limit_val = -1; /* Integer value of limit */
int no_empty = 0; /* If NO_EMPTY flag is set */
int delim_capture = 0; /* If delimiters should be captured */
+ int offset_capture = 0;/* If offsets should be captured */
int count = 0; /* Count of matched subpatterns */
int start_offset; /* Where the new search starts */
+ int next_offset; /* End of the last delimiter match + 1 */
int g_notempty = 0; /* If the match should not be empty */
char *match, /* The current match */
*last_match; /* Location of last match */
@@ -1102,6 +1121,7 @@ PHP_FUNCTION(preg_split)
convert_to_long_ex(flags);
no_empty = Z_LVAL_PP(flags) & PREG_SPLIT_NO_EMPTY;
delim_capture = Z_LVAL_PP(flags) & PREG_SPLIT_DELIM_CAPTURE;
+ offset_capture = Z_LVAL_PP(flags) & PREG_SPLIT_OFFSET_CAPTURE;
}
}
@@ -1123,6 +1143,7 @@ PHP_FUNCTION(preg_split)
/* Start at the beginning of the string */
start_offset = 0;
+ next_offset = 0;
last_match = Z_STRVAL_PP(subject);
match = NULL;
@@ -1143,9 +1164,15 @@ PHP_FUNCTION(preg_split)
match = Z_STRVAL_PP(subject) + offsets[0];
if (!no_empty || &Z_STRVAL_PP(subject)[offsets[0]] != last_match) {
- /* Add the piece to the return value */
- add_next_index_stringl(return_value, last_match,
- &Z_STRVAL_PP(subject)[offsets[0]]-last_match, 1);
+
+ if (offset_capture) {
+ /* Add (match, offset) pair to the return value */
+ add_offset_pair(return_value, last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, next_offset);
+ } else {
+ /* Add the piece to the return value */
+ add_next_index_stringl(return_value, last_match,
+ &Z_STRVAL_PP(subject)[offsets[0]]-last_match, 1);
+ }
/* One less left to do */
if (limit_val != -1)
@@ -1153,15 +1180,22 @@ PHP_FUNCTION(preg_split)
}
last_match = &Z_STRVAL_PP(subject)[offsets[1]];
+ next_offset = offsets[1];
if (delim_capture) {
int i, match_len;
for (i = 1; i < count; i++) {
match_len = offsets[(i<<1)+1] - offsets[i<<1];
- if (!no_empty || match_len > 0)
- add_next_index_stringl(return_value,
- &Z_STRVAL_PP(subject)[offsets[i<<1]],
- match_len, 1);
+ /* If we have matched a delimiter */
+ if (!no_empty || match_len > 0) {
+ if (offset_capture) {
+ add_offset_pair(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len, offsets[i<<1]);
+ } else {
+ add_next_index_stringl(return_value,
+ &Z_STRVAL_PP(subject)[offsets[i<<1]],
+ match_len, 1);
+ }
+ }
}
}
} else { /* Failed to match */
@@ -1185,11 +1219,20 @@ PHP_FUNCTION(preg_split)
/* Advance to the position right after the last full match */
start_offset = offsets[1];
}
-
+
+
if (!no_empty || start_offset != Z_STRLEN_PP(subject))
- /* Add the last piece to the return value */
- add_next_index_string(return_value,
- &Z_STRVAL_PP(subject)[start_offset], 1);
+ {
+ if (offset_capture) {
+ /* Add the last (match, offset) pair to the return value */
+ add_offset_pair(return_value, &Z_STRVAL_PP(subject)[start_offset], Z_STRLEN_PP(subject) - start_offset, start_offset);
+ } else {
+ /* Add the last piece to the return value */
+ add_next_index_string(return_value,
+ &Z_STRVAL_PP(subject)[start_offset], 1);
+ }
+ }
+
/* Clean up */
efree(offsets);