diff options
Diffstat (limited to 'regex_internal.c')
-rw-r--r-- | regex_internal.c | 64 |
1 files changed, 39 insertions, 25 deletions
diff --git a/regex_internal.c b/regex_internal.c index 0c4f8f80..9e427081 100644 --- a/regex_internal.c +++ b/regex_internal.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ static void re_string_construct_common (const char *str, int len, re_string_t *pstr, @@ -246,13 +245,8 @@ build_wcs_buffer (re_string_t *pstr) else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + if (BE (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; @@ -261,6 +255,12 @@ build_wcs_buffer (re_string_t *pstr) wc = pstr->trans[wc]; pstr->cur_state = prev_st; } + else if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; @@ -320,12 +320,11 @@ build_wcs_upper_buffer (re_string_t *pstr) + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen + 2 > 2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb (buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -343,9 +342,11 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { - /* It is an invalid character or '\0'. Just use the byte. */ + /* It is an invalid character, an incomplete character + at the end of the string, or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ @@ -388,12 +389,11 @@ build_wcs_upper_buffer (re_string_t *pstr) mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); if (BE (mbclen + 2 > 2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -458,7 +458,8 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { /* It is an invalid character or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; @@ -505,7 +506,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) rawbuf_idx < new_raw_idx;) { wchar_t wc2; - int remain_len = pstr->len - rawbuf_idx; + int remain_len = pstr->raw_len - rawbuf_idx; prev_st = pstr->cur_state; mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, remain_len, &pstr->cur_state); @@ -685,7 +686,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) pstr->valid_len - offset); pstr->valid_len -= offset; pstr->valid_raw_len -= offset; -#if DEBUG +#if defined DEBUG && DEBUG assert (pstr->valid_len > 0); #endif } @@ -741,16 +742,18 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) unsigned char buf[6]; size_t mbclen; + const unsigned char *pp = p; if (BE (pstr->trans != NULL, 0)) { int i = mlen < 6 ? mlen : 6; while (--i >= 0) buf[i] = pstr->trans[p[i]]; + pp = buf; } /* XXX Don't use mbrtowc, we know which conversion to use (UTF-8 -> UCS4). */ memset (&cur_state, 0, sizeof (cur_state)); - mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + mbclen = __mbrtowc (&wc2, (const char *) pp, mlen, &cur_state); if (raw + offset - p <= mbclen && mbclen < (size_t) -2) @@ -940,7 +943,7 @@ re_string_context_at (const re_string_t *input, int idx, int eflags) int wc_idx = idx; while(input->wcs[wc_idx] == WEOF) { -#ifdef DEBUG +#if defined DEBUG && DEBUG /* It must not happen. */ assert (wc_idx >= 0); #endif @@ -1446,7 +1449,18 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); if (BE (new_nexts == NULL || new_indices == NULL || new_edests == NULL || new_eclosures == NULL, 0)) - return -1; + { + /* if any are not NULL, free them, avoid leaks */ + if (new_nexts != NULL) + re_free(new_nexts); + if (new_indices != NULL) + re_free(new_indices); + if (new_edests != NULL) + re_free(new_edests); + if (new_eclosures != NULL) + re_free(new_eclosures); + return -1; + } dfa->nexts = new_nexts; dfa->org_indices = new_indices; dfa->edests = new_edests; |