summaryrefslogtreecommitdiff
path: root/regex_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'regex_internal.c')
-rw-r--r--regex_internal.c64
1 files changed, 39 insertions, 25 deletions
diff --git a/regex_internal.c b/regex_internal.c
index 0c4f8f80..9e427081 100644
--- a/regex_internal.c
+++ b/regex_internal.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2002-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
@@ -246,13 +245,8 @@ build_wcs_buffer (re_string_t *pstr)
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
- if (BE (mbclen == (size_t) -2, 0))
- {
- /* The buffer doesn't have enough space, finish to build. */
- pstr->cur_state = prev_st;
- break;
- }
- else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ if (BE (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
@@ -261,6 +255,12 @@ build_wcs_buffer (re_string_t *pstr)
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
+ else if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
@@ -320,12 +320,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
- wchar_t wcu = wc;
- if (iswlower (wc))
+ wchar_t wcu = towupper (wc);
+ if (wcu != wc)
{
size_t mbcdlen;
- wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -343,9 +342,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
- else if (mbclen == (size_t) -1 || mbclen == 0)
+ else if (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
- /* It is an invalid character or '\0'. Just use the byte. */
+ /* It is an invalid character, an incomplete character
+ at the end of the string, or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
@@ -388,12 +389,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
- wchar_t wcu = wc;
- if (iswlower (wc))
+ wchar_t wcu = towupper (wc);
+ if (wcu != wc)
{
size_t mbcdlen;
- wcu = towupper (wc);
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -458,7 +458,8 @@ build_wcs_upper_buffer (re_string_t *pstr)
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
- else if (mbclen == (size_t) -1 || mbclen == 0)
+ else if (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
@@ -505,7 +506,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
rawbuf_idx < new_raw_idx;)
{
wchar_t wc2;
- int remain_len = pstr->len - rawbuf_idx;
+ int remain_len = pstr->raw_len - rawbuf_idx;
prev_st = pstr->cur_state;
mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
remain_len, &pstr->cur_state);
@@ -685,7 +686,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
-#if DEBUG
+#if defined DEBUG && DEBUG
assert (pstr->valid_len > 0);
#endif
}
@@ -741,16 +742,18 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
unsigned char buf[6];
size_t mbclen;
+ const unsigned char *pp = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
+ pp = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
- mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+ mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
&cur_state);
if (raw + offset - p <= mbclen
&& mbclen < (size_t) -2)
@@ -940,7 +943,7 @@ re_string_context_at (const re_string_t *input, int idx, int eflags)
int wc_idx = idx;
while(input->wcs[wc_idx] == WEOF)
{
-#ifdef DEBUG
+#if defined DEBUG && DEBUG
/* It must not happen. */
assert (wc_idx >= 0);
#endif
@@ -1446,7 +1449,18 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
if (BE (new_nexts == NULL || new_indices == NULL
|| new_edests == NULL || new_eclosures == NULL, 0))
- return -1;
+ {
+ /* if any are not NULL, free them, avoid leaks */
+ if (new_nexts != NULL)
+ re_free(new_nexts);
+ if (new_indices != NULL)
+ re_free(new_indices);
+ if (new_edests != NULL)
+ re_free(new_edests);
+ if (new_eclosures != NULL)
+ re_free(new_eclosures);
+ return -1;
+ }
dfa->nexts = new_nexts;
dfa->org_indices = new_indices;
dfa->edests = new_edests;