summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2005-07-18 07:38:48 +0000
committerPaolo Bonzini <bonzini@gnu.org>2008-01-09 16:11:54 +0100
commitbdbfe6d711a2047b0d91b85cc6b5fdf49042ff6e (patch)
tree7e534ab13ed84a4b4b813ed4d93b879d74ed1800 /lib
parent584e5cedc208da4ab1f62347f6fb342bfc6dcf4c (diff)
downloadsed-bdbfe6d711a2047b0d91b85cc6b5fdf49042ff6e.tar.gz
merge regex with glibc and adjust doc/.arch-inventory
git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-51
Diffstat (limited to 'lib')
-rw-r--r--lib/regcomp.c22
-rw-r--r--lib/regex.c4
-rw-r--r--lib/regex_.h4
-rw-r--r--lib/regex_internal.c46
-rw-r--r--lib/regex_internal.h16
-rw-r--r--lib/regexec.c24
6 files changed, 74 insertions, 42 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c
index dd11bb3..37e0679 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -15,8 +15,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
int length, reg_syntax_t syntax);
@@ -359,7 +359,8 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
memset (&state, 0, sizeof (state));
if (mbrtowc (&wc, (const char *) buf, p - buf,
&state) == p - buf
- && __wcrtomb ((char *) buf, towlower (wc), &state) > 0)
+ && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1))
re_set_fastmap (fastmap, 0, buf[0]);
}
#endif
@@ -409,12 +410,13 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
char buf[256];
mbstate_t state;
memset (&state, '\0', sizeof (state));
- __wcrtomb (buf, cset->mbchars[i], &state);
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
- __wcrtomb (buf, towlower (cset->mbchars[i]), &state);
- re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
}
}
}
@@ -785,6 +787,8 @@ re_compile_internal (preg, pattern, length, syntax)
strncpy (dfa->re_str, pattern, length + 1);
#endif
+ __libc_lock_init (dfa->lock);
+
err = re_string_construct (&regexp, pattern, length, preg->translate,
syntax & RE_ICASE, dfa);
if (BE (err != REG_NOERROR, 0))
@@ -1322,10 +1326,10 @@ lower_subexp (err, preg, node)
if (preg->no_sub
/* We do not optimize empty subexpressions, because otherwise we may
- have bad CONCAT nodes with NULL children. This is obviously not
+ have bad CONCAT nodes with NULL children. This is obviously not
very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */
- && node->left
+ && node->left != NULL
&& (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
|| !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
return node->left;
diff --git a/lib/regex.c b/lib/regex.c
index 60b7300..7a4f304 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -15,8 +15,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
#ifdef HAVE_CONFIG_H
#include "config.h"
diff --git a/lib/regex_.h b/lib/regex_.h
index 99e7214..b2d9a62 100644
--- a/lib/regex_.h
+++ b/lib/regex_.h
@@ -16,8 +16,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
#ifndef _REGEX_H
#define _REGEX_H 1
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index fd6ebe8..baa5844 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -15,8 +15,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
@@ -214,12 +214,14 @@ build_wcs_buffer (pstr)
re_string_t *pstr;
{
#ifdef _LIBC
- unsigned char buf[pstr->mb_cur_max];
+ unsigned char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
unsigned char buf[64];
#endif
mbstate_t prev_st;
- int byte_idx, end_idx, mbclen, remain_len;
+ int byte_idx, end_idx, remain_len;
+ size_t mbclen;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
@@ -280,11 +282,13 @@ build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
mbstate_t prev_st;
- int src_idx, byte_idx, end_idx, mbclen, remain_len;
+ int src_idx, byte_idx, end_idx, remain_len;
+ size_t mbclen;
#ifdef _LIBC
- unsigned char buf[pstr->mb_cur_max];
+ char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
- unsigned char buf[64];
+ char buf[64];
#endif
byte_idx = pstr->valid_len;
@@ -316,12 +320,12 @@ build_wcs_upper_buffer (pstr)
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
- if (BE (mbclen > 0, 1))
+ if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
- int mbcdlen;
+ size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
@@ -384,20 +388,20 @@ build_wcs_upper_buffer (pstr)
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
- if (BE (mbclen > 0, 1))
+ if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
- int mbcdlen;
+ size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
- else
+ else if (mbcdlen != (size_t) -1)
{
- int i;
+ size_t i;
if (byte_idx + mbcdlen > pstr->bufs_len)
{
@@ -414,7 +418,7 @@ build_wcs_upper_buffer (pstr)
}
if (!pstr->offsets_needed)
{
- for (i = 0; i < byte_idx; ++i)
+ for (i = 0; i < (size_t) byte_idx; ++i)
pstr->offsets[i] = i;
pstr->offsets_needed = 1;
}
@@ -437,13 +441,15 @@ build_wcs_upper_buffer (pstr)
src_idx += mbclen;
continue;
}
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
}
else
memcpy (pstr->mbs + byte_idx, p, mbclen);
if (BE (pstr->offsets_needed != 0, 0))
{
- int i;
+ size_t i;
for (i = 0; i < mbclen; ++i)
pstr->offsets[byte_idx + i] = src_idx + i;
}
@@ -494,7 +500,8 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc)
wint_t *last_wc;
{
mbstate_t prev_st;
- int rawbuf_idx, mbclen;
+ int rawbuf_idx;
+ size_t mbclen;
wchar_t wc = 0;
/* Skip the characters which are not necessary to check. */
@@ -666,8 +673,9 @@ re_string_reconstruct (pstr, idx, eflags)
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
- mlen = mbrtowc (&wc2, p, mlen, &cur_state)
- - (raw + offset - p);
+ mlen = (mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state)
+ - (raw + offset - p));
if (mlen >= 0)
{
memset (&pstr->cur_state, '\0',
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 0a55cf3..6db3844 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -15,8 +15,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
#ifndef _REGEX_INTERNAL_H
#define _REGEX_INTERNAL_H 1
@@ -39,6 +39,14 @@
#if defined HAVE_WCTYPE_H || defined _LIBC
# include <wctype.h>
#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
/* In case that the system doesn't have isblank(). */
#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
@@ -60,7 +68,7 @@
# ifdef _LIBC
# undef gettext
# define gettext(msgid) \
- INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
+ INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
# endif
#else
# define gettext(msgid) (msgid)
@@ -629,7 +637,6 @@ struct re_dfa_t
int nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
- unsigned int empty_bkref_map;
unsigned int used_bkref_map;
unsigned int completed_bkref_map;
@@ -648,6 +655,7 @@ struct re_dfa_t
#ifdef DEBUG
char* re_str;
#endif
+ __libc_lock_define (, lock)
};
#ifndef RE_NO_INTERNAL_PROTOTYPES
diff --git a/lib/regexec.c b/lib/regexec.c
index b077c20..e635261 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -15,8 +15,8 @@
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA. */
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
int n) internal_function;
@@ -219,6 +219,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
{
reg_errcode_t err;
int start, length;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
return REG_BADPAT;
@@ -233,12 +234,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
start = 0;
length = strlen (string);
}
+
+ __libc_lock_lock (dfa->lock);
if (preg->no_sub)
err = re_search_internal (preg, string, length, start, length - start,
length, 0, NULL, eflags);
else
err = re_search_internal (preg, string, length, start, length - start,
length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
return err != REG_NOERROR;
}
@@ -402,6 +406,7 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
regmatch_t *pmatch;
int nregs, rval;
int eflags = 0;
+ re_dfa_t *dfa = (re_dfa_t *)bufp->buffer;
/* Check for out-of-range. */
if (BE (start < 0 || start > length, 0))
@@ -411,6 +416,8 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
else if (BE (start + range < 0, 0))
range = -start;
+ __libc_lock_lock (dfa->lock);
+
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
@@ -439,7 +446,10 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
nregs = bufp->re_nsub + 1;
pmatch = re_malloc (regmatch_t, nregs);
if (BE (pmatch == NULL, 0))
- return -2;
+ {
+ rval = -2;
+ goto out;
+ }
result = re_search_internal (bufp, string, length, start, range, stop,
nregs, pmatch, eflags);
@@ -469,6 +479,8 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
rval = pmatch[0].rm_so;
}
re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
return rval;
}
@@ -3781,8 +3793,8 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx)
{
const re_charset_t *cset = node->opr.mbcset;
# ifdef _LIBC
- const unsigned char *pin = ((char *) re_string_get_buffer (input)
- + str_idx);
+ const unsigned char *pin
+ = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
int j;
uint32_t nrules;
# endif /* _LIBC */