diff options
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
-rw-r--r-- | ext/mbstring/php_mbregex.c | 983 |
1 files changed, 0 insertions, 983 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c deleted file mode 100644 index bd3069b98e..0000000000 --- a/ext/mbstring/php_mbregex.c +++ /dev/null @@ -1,983 +0,0 @@ -/* - * PHP3 Internationalization support program. - * - * Copyright (c) 1999,2000 by the PHP3 internationalization team. - * All rights reserved. - * - * This program is free software. You can use, redistribute and/or modify - * without fee under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY including implied or express warranty of - * marchantability or fitness for a particular purpose. - * - * Currently, the "PHP3 internationalization team" has no relationship with - * the "PHP Development Team". But we hope these code will be integrated - * into the PHP3, and it will be distributed as a part of PHP3. - * - * See README_i18n for more detail. - * - * Authors: - * Hironori Sato <satoh@jpnnet.com> - * Shigeru Kanemoto <sgk@happysize.co.jp> - * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> - */ - -/* - * PHP4 multibyte regular expression module - * Authors: - * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> - */ - -/* $Id$ */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "php.h" -#include "php_ini.h" -#include "mbregex.h" -#include "mbstring.h" - -#if HAVE_MBREGEX - -extern ZEND_DECLARE_MODULE_GLOBALS(mbstring) - -/* - * string buffer - */ -struct strbuf { - unsigned char* buffer; - int length; - int pos; - int allocsz; -}; - -static void -php_mbregex_strbuf_init(struct strbuf *pd) -{ - if (pd) { - pd->buffer = (char*)0; - pd->length = 0; - pd->pos = 0; - pd->allocsz = 64; - } -} - -static int -php_mbregex_strbuf_ncat(struct strbuf *pd, const unsigned char *psrc, int len) -{ - if (pd == NULL || psrc == NULL) { - return -1; - } - - if ((pd->pos + len) >= pd->length) { - /* reallocate buffer */ - int newlen = pd->length + pd->allocsz + len; - unsigned char *tmp = (unsigned char*)erealloc((void*)pd->buffer, newlen); - if (tmp == NULL) { - return -1; - } - pd->length = newlen; - pd->buffer = tmp; - } - - while (len > 0) { - pd->buffer[pd->pos++] = *psrc++; - len--; - } - - return len; -} - - -/* - * encoding name resolver - */ -int -php_mbregex_name2mbctype(const char *pname) -{ - int mbctype; - - mbctype = -1; - if (pname != NULL) { - if (strcasecmp("EUC-JP", pname) == 0) { - mbctype = MBCTYPE_EUC; - } else if (strcasecmp("UTF-8", pname) == 0) { - mbctype = MBCTYPE_UTF8; - } else if (strcasecmp("SJIS", pname) == 0) { - mbctype = MBCTYPE_SJIS; - } else if (strcasecmp("ascii", pname) == 0) { - mbctype = MBCTYPE_ASCII; - } else if (strcasecmp("euc", pname) == 0) { - mbctype = MBCTYPE_EUC; - } else if (strcasecmp("eucJP", pname) == 0) { - mbctype = MBCTYPE_EUC; - } else if (strcasecmp("EUC_JP", pname) == 0) { - mbctype = MBCTYPE_EUC; - } else if (strcasecmp("Shift_JIS", pname) == 0) { - mbctype = MBCTYPE_SJIS; - } - } - - return mbctype; -} - -static const char* -php_mbregex_mbctype2name(int mbctype) -{ - const char *p; - - if (mbctype == MBCTYPE_EUC) { - p = "EUC-JP"; - } else if(mbctype == MBCTYPE_UTF8) { - p = "UTF-8"; - } else if(mbctype == MBCTYPE_SJIS) { - p = "SJIS"; - } else if(mbctype == MBCTYPE_ASCII) { - p = "ascii"; - } else { - p = "unknown"; - } - - return p; -} - - -/* - * regex cache - */ -static int -php_mbregex_compile_pattern(mb_regex_t *pre, const char *pattern, int patlen, int options, int mbctype TSRMLS_DC) -{ - int res = 0; - const char *err_str = NULL; - mb_regex_t *rc = NULL; - - if(zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc) == FAILURE || - rc->options != options || rc->mbctype != mbctype) { - memset(pre, 0, sizeof(*pre)); - pre->fastmap = (char*)emalloc((1 << MBRE_BYTEWIDTH)*sizeof(char)); - if (pre->fastmap) { - pre->options = options; - pre->mbctype = mbctype; - err_str = mbre_compile_pattern(pattern, patlen, pre); - if (!err_str) { - zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen+1, (void *) pre, sizeof(*pre), NULL); - } else { - efree(pre->fastmap); - pre->fastmap = (char*)0; - php_error(E_WARNING, "mbregex compile err: %s", err_str); - res = 1; - } - } else { - php_error(E_WARNING, "Unable to allocate memory in mbregex_compile_pattern"); - res = 1; - } - } else { - memcpy(pre, rc, sizeof(*pre)); - } - - return res; -} - -static void -php_mbregex_init_option(const char *parg, int narg, int *option, int *eval) -{ - int n; - char c; - - if (parg) { - n = 0; - while(n < narg) { - c = parg[n++]; - if (option) { - switch (c) { - case 'i': - *option |= MBRE_OPTION_IGNORECASE; - break; - case 'x': - *option |= MBRE_OPTION_EXTENDED; - break; - case 'm': - *option |= MBRE_OPTION_MULTILINE; - break; - case 's': - *option |= MBRE_OPTION_SINGLELINE; - break; - case 'p': - *option |= MBRE_OPTION_POSIXLINE; - break; - case 'l': - *option |= MBRE_OPTION_LONGEST; - break; - default: - break; - } - } - if (eval && (c == 'e')) { - *eval = 1; - } - } - } -} - - -/* - * php funcions - */ - -/* {{{ proto string mb_regex_encoding([string encoding]) - Returns the current encoding as a string. */ -PHP_FUNCTION(mb_regex_encoding) -{ - zval **arg1; - int mbctype; - - if (ZEND_NUM_ARGS() == 0) { - RETVAL_STRING((char*)php_mbregex_mbctype2name(MBSTRG(current_mbctype)), 1); - } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) { - convert_to_string_ex(arg1); - mbctype = php_mbregex_name2mbctype(Z_STRVAL_PP(arg1)); - if (mbctype < 0) { - php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1)); - RETVAL_FALSE; - } else { - MBSTRG(current_mbctype) = mbctype; - RETVAL_TRUE; - } - } else { - WRONG_PARAM_COUNT; - } -} -/* }}} */ - - -/* regex match */ -static void -php_mbereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) -{ - zval **arg_pattern, **arg_string, **array = NULL; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - int i, err, match_len, string_len, option, beg, end; - char *str; - - switch(ZEND_NUM_ARGS()) { - case 2: - if (zend_get_parameters_ex(2, &arg_pattern, &arg_string) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - - case 3: - if (zend_get_parameters_ex(3, &arg_pattern, &arg_string, &array) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - - default: - WRONG_PARAM_COUNT; - break; - } - - option = 0; - if (icase) { - option |= MBRE_OPTION_IGNORECASE; - } - - /* compile the regular expression from the supplied regex */ - if (Z_TYPE_PP(arg_pattern) == IS_STRING) { - option |= MBRE_OPTION_EXTENDED; - } else { - /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) { - convert_to_long_ex(arg_pattern); /* get rid of decimal places */ - } - convert_to_string_ex(arg_pattern); - /* don't bother doing an extended regex with just a number */ - } - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - /* actually execute the regular expression */ - convert_to_string_ex(arg_string); - err = mbre_search( - &re, - Z_STRVAL_PP(arg_string), - Z_STRLEN_PP(arg_string), - 0, Z_STRLEN_PP(arg_string), - ®s); - if (err < 0) { - mbre_free_registers(®s); - RETURN_FALSE; - } - - match_len = 1; - str = Z_STRVAL_PP(arg_string); - if (array) { - match_len = regs.end[0] - regs.beg[0]; - string_len = Z_STRLEN_PP(arg_string); - zval_dtor(*array); /* start with clean array */ - array_init(*array); - for (i = 0; i < regs.num_regs; i++) { - beg = regs.beg[i]; - end = regs.end[i]; - if (beg >= 0 && beg < end && end <= string_len) { - add_index_stringl(*array, i, &str[beg], end - beg, 1); - } else { - add_index_bool(*array, i, 0); - } - } - } - - mbre_free_registers(®s); - if (match_len == 0) { - match_len = 1; - } - RETVAL_LONG(match_len); -} - -/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) - Regular expression match for multibyte string */ -PHP_FUNCTION(mb_ereg) -{ - php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) - Case-insensitive regular expression match for multibyte string */ -PHP_FUNCTION(mb_eregi) -{ - php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - - - -/* regex replacement */ -static void -php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option) -{ - zval **arg_pattern, **arg_replace, **arg_string, **arg_option; - char *p, *string, *replace; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - struct strbuf outdev, evaldev, *pdevice; - int i, n, err, pos, replace_len, string_len, eval; - char *description = NULL; - zval retval; - - eval = 0; - switch(ZEND_NUM_ARGS()) { - case 3: - if (zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - - case 4: - if (zend_get_parameters_ex(4, &arg_pattern, &arg_replace, &arg_string, &arg_option) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_option); - option = 0; - php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, &eval); - break; - - default: - WRONG_PARAM_COUNT; - break; - } - - convert_to_string_ex(arg_pattern); - /* create regex pattern buffer */ - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - convert_to_string_ex(arg_replace); - replace = Z_STRVAL_PP(arg_replace); - replace_len = Z_STRLEN_PP(arg_replace); - - convert_to_string_ex(arg_string); - string = Z_STRVAL_PP(arg_string); - string_len = Z_STRLEN_PP(arg_string); - - /* initialize string buffer (auto reallocate buffer) */ - php_mbregex_strbuf_init(&outdev); - php_mbregex_strbuf_init(&evaldev); - outdev.allocsz = (string_len >> 2) + 8; - - if (eval) { - pdevice = &evaldev; - description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); - } else { - pdevice = &outdev; - description = NULL; - } - - /* do the actual work */ - err = 0; - pos = 0; - while (err >= 0) { - err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s); - if (err <= -2) { - php_error(E_WARNING, "mbregex search failure in php_mbereg_replace_exec()"); - break; - } - if (err >= 0) { - /* copy the part of the string before the match */ - php_mbregex_strbuf_ncat(&outdev, &string[pos], regs.beg[0] - pos); - /* copy replacement and backrefs */ - i = 0; - p = replace; - while (i < replace_len) { - n = -1; - if (p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { - n = p[1] - '0'; - } - if (n >= 0 && n < regs.num_regs) { - if (regs.beg[n] >= 0 && regs.beg[n] < regs.end[n] && regs.end[n] <= string_len) { - php_mbregex_strbuf_ncat(pdevice, &string[regs.beg[n]], regs.end[n] - regs.beg[n]); - } - p += 2; - i += 2; - } else { - php_mbregex_strbuf_ncat(pdevice, p, 1); - p++; - i++; - } - } - if (eval) { - /* null terminate buffer */ - php_mbregex_strbuf_ncat(&evaldev, "\0", 1); - /* do eval */ - zend_eval_string(evaldev.buffer, &retval, description TSRMLS_CC); - /* result of eval */ - convert_to_string(&retval); - php_mbregex_strbuf_ncat(&outdev, retval.value.str.val, retval.value.str.len); - /* Clean up */ - evaldev.pos = 0; - zval_dtor(&retval); - } - n = regs.end[0]; - if (pos < n) { - pos = n; - } else { - pos++; - } - } else { /* nomatch */ - /* stick that last bit of string on our output */ - php_mbregex_strbuf_ncat(&outdev, &string[pos], string_len - pos); - } - } - - if (description) { - efree(description); - } - mbre_free_registers(®s); - if (evaldev.buffer) { - efree((void*)evaldev.buffer); - } - n = outdev.pos; - php_mbregex_strbuf_ncat(&outdev, "\0", 1); - if (err <= -2) { - if (outdev.buffer) { - efree((void*)outdev.buffer); - } - RETVAL_FALSE; - } else { - RETVAL_STRINGL(outdev.buffer, n, 0); - } -} - -/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) - Replace regular expression for multibyte string */ -PHP_FUNCTION(mb_ereg_replace) -{ - php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED); -} -/* }}} */ - -/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) - Case insensitive replace regular expression for multibyte string */ -PHP_FUNCTION(mb_eregi_replace) -{ - php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED | MBRE_OPTION_IGNORECASE); -} -/* }}} */ - - -/* {{{ proto array mb_split(string pattern, string string [, int limit]) - split multibyte string into array by regular expression */ -PHP_FUNCTION(mb_split) -{ - zval **arg_pat, **arg_str, **arg_count = NULL; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - char *string; - int n, err, count, string_len, pos; - - count = -1; - switch (ZEND_NUM_ARGS()) { - case 2: - if (zend_get_parameters_ex(2, &arg_pat, &arg_str) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - - case 3: - if (zend_get_parameters_ex(3, &arg_pat, &arg_str, &arg_count) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_long_ex(arg_count); - count = Z_LVAL_PP(arg_count); - break; - - default: - WRONG_PARAM_COUNT; - break; - } - - if (array_init(return_value) == FAILURE) { - RETURN_FALSE; - } - - convert_to_string_ex(arg_pat); - convert_to_string_ex(arg_str); - - /* create regex pattern buffer */ - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_PP(arg_pat), - Z_STRLEN_PP(arg_pat), - MBRE_OPTION_EXTENDED, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - string = Z_STRVAL_PP(arg_str); - string_len = Z_STRLEN_PP(arg_str); - pos = 0; - err = 0; - /* churn through str, generating array entries as we go */ - while ((count != 0) && - (err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s)) >= 0) { - n = regs.beg[0]; - if (n == pos) { - /* match is at start of string, return empty string */ - add_next_index_stringl(return_value, empty_string, 0, 1); - } else { - /* On a real match */ - /* add it to the array */ - if (n < string_len) { - n -= pos; - add_next_index_stringl(return_value, &string[pos], n, 1); - } else { - err = -2; - break; - } - } - /* point at our new starting point */ - n = regs.end[0]; - if (pos < n) { - pos = n; - } else { - pos++; - } - /* if we're only looking for a certain number of points, - stop looking once we hit it */ - if (count > 0) { - count--; - } - } - - mbre_free_registers(®s); - - /* see if we encountered an error */ - if (err <= -2) { - php_error(E_WARNING, "mbregex search failure in mbsplit()"); - zval_dtor(return_value); - RETURN_FALSE; - } - - /* otherwise we just have one last element to add to the array */ - n = string_len - pos; - if (n > 0) { - add_next_index_stringl(return_value, &string[pos], n, 1); - } else { - add_next_index_stringl(return_value, empty_string, 0, 1); - } -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) - Regular expression match for multibyte string */ -PHP_FUNCTION(mb_ereg_match) -{ - zval **arg_pattern, **arg_str, **arg_option; - mb_regex_t re; - int option, err; - - option = MBRE_OPTION_EXTENDED; - switch (ZEND_NUM_ARGS()) { - case 2: - if (zend_get_parameters_ex(2, &arg_pattern, &arg_str) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 3: - if (zend_get_parameters_ex(3, &arg_pattern, &arg_str, &arg_option) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_option); - option = 0; - php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); - break; - default: - WRONG_PARAM_COUNT; - break; - } - - /* create regex pattern buffer */ - convert_to_string_ex(arg_pattern); - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - /* match */ - convert_to_string_ex(arg_str); - err = mbre_match(&re, Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), 0, NULL); - if (err >= 0) { - RETVAL_TRUE; - } else { - RETVAL_FALSE; - } -} -/* }}} */ - - -/* regex search */ -static void -php_mbereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) -{ - zval **arg_pattern, **arg_option; - int n, i, err, pos, len, beg, end, option; - unsigned char *str; - - option = MBRE_OPTION_EXTENDED; - switch (ZEND_NUM_ARGS()) { - case 0: - break; - case 1: - if (zend_get_parameters_ex(1, &arg_pattern) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 2: - if (zend_get_parameters_ex(2, &arg_pattern, &arg_option) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_option); - option = 0; - php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); - break; - default: - WRONG_PARAM_COUNT; - break; - } - if (ZEND_NUM_ARGS() > 0) { - /* create regex pattern buffer */ - convert_to_string_ex(arg_pattern); - if (!MBSTRG(search_re)) { - MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); - } - err = php_mbregex_compile_pattern( - MBSTRG(search_re), - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - efree(MBSTRG(search_re)); - MBSTRG(search_re) = (mb_regex_t*)0; - RETURN_FALSE; - } - } - - pos = MBSTRG(search_pos); - str = NULL; - len = 0; - if (Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING){ - str = Z_STRVAL_PP(MBSTRG(search_str)); - len = Z_STRLEN_PP(MBSTRG(search_str)); - } - - if (!MBSTRG(search_re)) { - php_error(E_WARNING, "no regex for search"); - RETURN_FALSE; - } - if (!str) { - php_error(E_WARNING, "no string for search"); - RETURN_FALSE; - } - if (MBSTRG(search_regs)) { - mbre_free_registers(MBSTRG(search_regs)); - memset(MBSTRG(search_regs), 0, sizeof(struct mbre_registers)); - } else { - MBSTRG(search_regs) = (struct mbre_registers*)ecalloc(1, sizeof(struct mbre_registers)); - } - - err = mbre_search(MBSTRG(search_re), str, len, pos, len - pos, MBSTRG(search_regs)); - - if (err <= -2) { - php_error(E_WARNING, "mbregex search failure in mbregex_search()"); - RETVAL_FALSE; - } else if (err < 0) { - MBSTRG(search_pos) = len; - RETVAL_FALSE; - } else { - switch (mode) { - case 1: - if (array_init(return_value) != FAILURE) { - beg = MBSTRG(search_regs)->beg[0]; - end = MBSTRG(search_regs)->end[0]; - add_next_index_long(return_value, beg); - add_next_index_long(return_value, end - beg); - } else { - RETVAL_FALSE; - } - break; - case 2: - if (array_init(return_value) != FAILURE) { - n = MBSTRG(search_regs)->num_regs; - for (i = 0; i < n; i++) { - beg = MBSTRG(search_regs)->beg[i]; - end = MBSTRG(search_regs)->end[i]; - if (beg >= 0 && beg <= end && end <= len) { - add_index_stringl(return_value, i, &str[beg], end - beg, 1); - } else { - add_index_bool(return_value, i, 0); - } - } - } else { - RETVAL_FALSE; - } - break; - default: - RETVAL_TRUE; - break; - } - end = MBSTRG(search_regs)->end[0]; - if (pos < end) { - MBSTRG(search_pos) = end; - } else { - MBSTRG(search_pos) = pos + 1; - } - } - - if (err < 0) { - mbre_free_registers(MBSTRG(search_regs)); - efree(MBSTRG(search_regs)); - MBSTRG(search_regs) = (struct mbre_registers*)0; - } -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search) -{ - php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search_pos) -{ - php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search_regs) -{ - php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) - Initialize string and regular expression for search. */ -PHP_FUNCTION(mb_ereg_search_init) -{ - zval **arg_str, **arg_pattern, **arg_option; - int err, option; - - option = MBRE_OPTION_EXTENDED; - switch (ZEND_NUM_ARGS()) { - case 1: - if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 2: - if (zend_get_parameters_ex(2, &arg_str, &arg_pattern) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 3: - if (zend_get_parameters_ex(3, &arg_str, &arg_pattern, &arg_option) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_option); - option = 0; - php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); - break; - default: - WRONG_PARAM_COUNT; - break; - } - if (ZEND_NUM_ARGS() > 1) { - /* create regex pattern buffer */ - convert_to_string_ex(arg_pattern); - if (!MBSTRG(search_re)) { - MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); - } - err = php_mbregex_compile_pattern( - MBSTRG(search_re), - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - efree(MBSTRG(search_re)); - MBSTRG(search_re) = (mb_regex_t*)0; - RETURN_FALSE; - } - } - - if (MBSTRG(search_str)) { - if (ZVAL_REFCOUNT(*MBSTRG(search_str)) > 1) { - ZVAL_DELREF(*MBSTRG(search_str)); - } else { - zval_dtor(*MBSTRG(search_str)); - FREE_ZVAL(*MBSTRG(search_str)); - } - MBSTRG(search_str) = (zval **)0; - MBSTRG(search_str_val) = (zval *)0; - } - - if (PZVAL_IS_REF(*arg_str)) { - ZVAL_ADDREF(*arg_str); - MBSTRG(search_str_val) = *arg_str; - MBSTRG(search_str) = &MBSTRG(search_str_val); - } else { - MAKE_STD_ZVAL(MBSTRG(search_str_val)); - *MBSTRG(search_str_val) = **arg_str; - zval_copy_ctor(MBSTRG(search_str_val)); - MBSTRG(search_str_val)->refcount = 1; - MBSTRG(search_str_val)->is_ref = 0; - MBSTRG(search_str) = &MBSTRG(search_str_val); - convert_to_string_ex(MBSTRG(search_str)); - } - - MBSTRG(search_pos) = 0; - - if (MBSTRG(search_regs)) { - mbre_free_registers(MBSTRG(search_regs)); - efree(MBSTRG(search_regs)); - MBSTRG(search_regs) = (struct mbre_registers*)0; - } - - RETURN_TRUE; -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_getregs(void) - Get matched substring of the last time */ -PHP_FUNCTION(mb_ereg_search_getregs) -{ - int n, i, len, beg, end; - unsigned char *str; - - if (MBSTRG(search_regs) && Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING && - Z_STRVAL_PP(MBSTRG(search_str)) && array_init(return_value) != FAILURE) { - str = Z_STRVAL_PP(MBSTRG(search_str)); - len = Z_STRLEN_PP(MBSTRG(search_str)); - n = MBSTRG(search_regs)->num_regs; - for (i = 0; i < n; i++) { - beg = MBSTRG(search_regs)->beg[i]; - end = MBSTRG(search_regs)->end[i]; - if (beg >= 0 && beg <= end && end <= len) { - add_index_stringl(return_value, i, &str[beg], end - beg, 1); - } else { - add_index_bool(return_value, i, 0); - } - } - } else { - RETVAL_FALSE; - } -} -/* }}} */ - - -/* {{{ proto int mb_ereg_search_getpos(void) - Get search start position */ -PHP_FUNCTION(mb_ereg_search_getpos) -{ - RETVAL_LONG(MBSTRG(search_pos)); -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search_setpos(int position) - Set search start position */ -PHP_FUNCTION(mb_ereg_search_setpos) -{ - zval **arg_pos; - int n; - - if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_pos) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_long_ex(arg_pos); - n = Z_LVAL_PP(arg_pos); - if (n < 0) { - php_error(E_WARNING, "position is minus value"); - MBSTRG(search_pos) = 0; - RETVAL_FALSE; - } else { - MBSTRG(search_pos) = n; - RETVAL_TRUE; - } -} -/* }}} */ - -#endif /* HAVE_MBREGEX */ |