summaryrefslogtreecommitdiff
path: root/subst.c
diff options
context:
space:
mode:
authorChet Ramey <chet.ramey@case.edu>2011-12-03 12:53:09 -0500
committerChet Ramey <chet.ramey@case.edu>2011-12-03 12:53:09 -0500
commit545f34cfd4e62689f390814b4413aa7c78f85faf (patch)
treef6763ea3d6ce54967f02309e80f9752ddbfd2875 /subst.c
parent5e13499c55639e93fbe46ce3dc053d74e5578cf9 (diff)
downloadbash-545f34cfd4e62689f390814b4413aa7c78f85faf.tar.gz
commit bash-20040108 snapshot
Diffstat (limited to 'subst.c')
-rw-r--r--subst.c265
1 files changed, 198 insertions, 67 deletions
diff --git a/subst.c b/subst.c
index 1e935797..a43a72f2 100644
--- a/subst.c
+++ b/subst.c
@@ -4,7 +4,7 @@
/* ``Have a little faith, there's magic in the night. You ain't a
beauty, but, hey, you're alright.'' */
-/* Copyright (C) 1987-2003 Free Software Foundation, Inc.
+/* Copyright (C) 1987-2004 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
@@ -210,6 +210,8 @@ static char *extract_dollar_brace_string __P((char *, int *, int, int));
static char *pos_params __P((char *, int, int, int));
+static unsigned char *mb_getcharlens __P((char *, int));
+
static char *remove_pattern __P((char *, char *, int));
static int match_pattern_char __P((char *, char *));
static int match_pattern __P((char *, char *, int, char **, char **));
@@ -245,6 +247,7 @@ static intmax_t parameter_brace_expand_length __P((char *));
static char *skiparith __P((char *, int));
static int verify_substring_values __P((char *, char *, int, intmax_t *, intmax_t *));
static int get_var_and_type __P((char *, char *, SHELL_VAR **, char **));
+static char *mb_substring __P((char *, int, int));
static char *parameter_brace_substring __P((char *, char *, char *, int));
static char *pos_params_pat_subst __P((char *, char *, char *, int));
@@ -2836,6 +2839,31 @@ word_list_remove_quoted_nulls (list)
/* */
/* **************************************************************** */
+#if defined (HANDLE_MULTIBYTE)
+static unsigned char *
+mb_getcharlens (string, len)
+ char *string;
+ int len;
+{
+ int i, offset;
+ unsigned char last, *ret;
+ char *p;
+ DECLARE_MBSTATE;
+
+ i = offset = 0;
+ last = 0;
+ ret = (unsigned char *)xmalloc (len);
+ memset (ret, 0, len);
+ while (string[last])
+ {
+ ADVANCE_CHAR (string, len, offset);
+ ret[last] = offset - last;
+ last = offset;
+ }
+ return ret;
+}
+#endif
+
/* Remove the portion of PARAM matched by PATTERN according to OP, where OP
can have one of 4 values:
RP_LONG_LEFT remove longest matching portion at start of PARAM
@@ -2857,6 +2885,9 @@ remove_pattern (param, pattern, op)
register int len;
register char *end;
register char *p, *ret, c;
+ int offset;
+ unsigned char *mblen;
+ DECLARE_MBSTATE;
if (param == NULL)
return (param);
@@ -2866,25 +2897,42 @@ remove_pattern (param, pattern, op)
len = STRLEN (param);
end = param + len;
+ mblen = (unsigned char *)0;
+#if defined (HANDLE_MULTIBYTE)
+ if (MB_CUR_MAX > 1 && (op == RP_LONG_LEFT || op == RP_SHORT_RIGHT))
+ mblen = mb_getcharlens (param, len);
+#endif
+
switch (op)
{
case RP_LONG_LEFT: /* remove longest match at start */
- /* BACKUP_CHAR_P (param, len, p); */
- for (p = end; p >= param; p--)
+ p = end;
+ while (p >= param)
{
c = *p; *p = '\0';
if (strmatch (pattern, param, FNMATCH_EXTFLAG) != FNM_NOMATCH)
{
*p = c;
+ FREE (mblen);
return (savestring (p));
}
*p = c;
+
+ if (MB_CUR_MAX > 1)
+ {
+ while (p >= param)
+ if (mblen[--p - param])
+ break;
+ }
+ else
+ p--;
}
break;
case RP_SHORT_LEFT: /* remove shortest match at start */
- /* ADVANCE_CHAR_P (p, end - p),p++ */
- for (p = param; p <= end; p++)
+ p = param;
+ offset = 0;
+ while (p <= end)
{
c = *p; *p = '\0';
if (strmatch (pattern, param, FNMATCH_EXTFLAG) != FNM_NOMATCH)
@@ -2893,12 +2941,21 @@ remove_pattern (param, pattern, op)
return (savestring (p));
}
*p = c;
+
+ if (MB_CUR_MAX > 1)
+ {
+ ADVANCE_CHAR (param, len, offset);
+ p = param + offset;
+ }
+ else
+ p++;
}
break;
case RP_LONG_RIGHT: /* remove longest match at end */
- /* ADVANCE_CHAR_P (p, end - p),p++ */
- for (p = param; p <= end; p++)
+ p = param;
+ offset = 0;
+ while (p <= end)
{
if (strmatch (pattern, p, FNMATCH_EXTFLAG) != FNM_NOMATCH)
{
@@ -2907,23 +2964,43 @@ remove_pattern (param, pattern, op)
*p = c;
return (ret);
}
+
+ if (MB_CUR_MAX > 1)
+ {
+ ADVANCE_CHAR (param, len, offset);
+ p = param + offset;
+ }
+ else
+ p++;
}
break;
case RP_SHORT_RIGHT: /* remove shortest match at end */
- /* BACKUP_CHAR_P (param, len, p); */
- for (p = end; p >= param; p--)
+ p = end;
+ while (p >= param)
{
if (strmatch (pattern, p, FNMATCH_EXTFLAG) != FNM_NOMATCH)
{
c = *p; *p = '\0';
ret = savestring (param);
*p = c;
+ FREE (mblen);
return (ret);
}
+
+ if (MB_CUR_MAX > 1)
+ {
+ while (p >= param)
+ if (mblen[--p - param])
+ break;
+ }
+ else
+ p--;
}
break;
}
+
+ FREE (mblen);
return (savestring (param)); /* no match, return original string */
}
@@ -2972,6 +3049,9 @@ match_pattern (string, pat, mtype, sp, ep)
int c, len;
register char *p, *p1;
char *end;
+ int offset;
+ unsigned char *mblen;
+ DECLARE_MBSTATE;
if (string == 0 || *string == 0 || pat == 0 || *pat == 0)
return (0);
@@ -2979,16 +3059,23 @@ match_pattern (string, pat, mtype, sp, ep)
len = STRLEN (string);
end = string + len;
+ mblen = (unsigned char *)0;
+#if defined (HANDLE_MULTIBYTE)
+ if (MB_CUR_MAX > 1 && (mtype == MATCH_ANY || mtype == MATCH_BEG))
+ mblen = mb_getcharlens (string, len);
+#endif
+
switch (mtype)
{
case MATCH_ANY:
- /* ADVANCE_CHAR_P (p, end - p),p++ */
- for (p = string; p <= end; p++)
+ p = string;
+ offset = 0;
+ while (p <= end)
{
if (match_pattern_char (pat, p))
{
- /* BACKUP_CHAR_P (p, end - p, p1) */
- for (p1 = end; p1 >= p; p1--)
+ p1 = end;
+ while (p1 >= p)
{
c = *p1; *p1 = '\0';
if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0)
@@ -2996,19 +3083,40 @@ match_pattern (string, pat, mtype, sp, ep)
*p1 = c;
*sp = p;
*ep = p1;
+ FREE (mblen);
return 1;
}
*p1 = c;
+
+ if (MB_CUR_MAX > 1)
+ {
+ while (p1 >= p)
+ if (mblen[--p1 - string])
+ break;
+ }
+ else
+ p1--;
}
}
+
+ if (MB_CUR_MAX > 1)
+ {
+ ADVANCE_CHAR (string, len, offset);
+ p = string + offset;
+ }
+ else
+ p++;
}
+
+ FREE (mblen);
return (0);
case MATCH_BEG:
if (match_pattern_char (pat, string) == 0)
return (0);
- /* BACKUP_CHAR_P (string, len, p) */
- for (p = end; p >= string; p--)
+
+ p = end;
+ while (p >= string)
{
c = *p; *p = '\0';
if (strmatch (pat, string, FNMATCH_EXTFLAG) == 0)
@@ -3016,24 +3124,48 @@ match_pattern (string, pat, mtype, sp, ep)
*p = c;
*sp = string;
*ep = p;
+ FREE (mblen);
return 1;
}
*p = c;
+
+ if (MB_CUR_MAX > 1)
+ {
+ while (p >= string)
+ if (mblen[--p - string])
+ break;
+ }
+ else
+ p--;
}
+
+ FREE (mblen);
return (0);
case MATCH_END:
- /* ADVANCE_CHAR_P(p, end - p), p++ */
- for (p = string; p <= end; p++)
- if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0)
- {
- *sp = p;
- *ep = end;
- return 1;
- }
+ p = string;
+ offset = 0;
+ while (p <= end)
+ {
+ if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0)
+ {
+ *sp = p;
+ *ep = end;
+ return 1;
+ }
+
+ if (MB_CUR_MAX > 1)
+ {
+ ADVANCE_CHAR (string, len, offset);
+ p = string + offset;
+ }
+ else
+ p++;
+ }
return (0);
}
+ FREE (mblen);
return (0);
}
@@ -4622,6 +4754,35 @@ get_var_and_type (varname, value, varp, valp)
/* */
/******************************************************/
+#if defined (HANDLE_MULTIBYTE)
+/* Character-oriented rather than strictly byte-oriented substrings. S and
+ E, rather being strict indices into STRING, indicate character (possibly
+ multibyte character) positions that require calculation.
+ Used by the ${param:offset[:length]} expansion. */
+static char *
+mb_substring (string, s, e)
+ char *string;
+ int s, e;
+{
+ char *tt;
+ int start, stop, i, slen;
+ DECLARE_MBSTATE;
+
+ start = 0;
+ slen = STRLEN (string);
+
+ i = s;
+ while (string[start] && i--)
+ ADVANCE_CHAR (string, slen, start);
+ stop = start;
+ i = e - s;
+ while (string[stop] && i--)
+ ADVANCE_CHAR (string, slen, stop);
+ tt = substring (string, start, stop);
+ return tt;
+}
+#endif
+
/* Process a variable substring expansion: ${name:e1[:e2]}. If VARNAME
is `@', use the positional parameters; otherwise, use the value of
VARNAME. If VARNAME is an array variable, use the array elements. */
@@ -4656,7 +4817,13 @@ parameter_brace_substring (varname, value, substr, quoted)
{
case VT_VARIABLE:
case VT_ARRAYMEMBER:
+#if defined (HANDLE_MULTIBYTE)
+ if (MB_CUR_MAX > 1)
+ tt = mb_substring (val, e1, e2);
+ else
+#endif
tt = substring (val, e1, e2);
+
if (vtype == VT_VARIABLE)
FREE (val);
if (quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT))
@@ -5747,26 +5914,7 @@ expand_word_internal (word, quoted, isexp, contains_dollar_at, expanded_somethin
#if HANDLE_MULTIBYTE
if (MB_CUR_MAX > 1 && string[sindex])
{
- int i;
- mbstate_t state_bak;
- size_t mblength;
-
- state_bak = state;
- mblength = mbrlen (string + sindex, string_size - sindex, &state);
- if (MB_INVALIDCH (mblength))
- {
- state = state_bak;
- mblength = 1;
- }
- if (mblength < 1)
- mblength = 1;
- temp = (char *)xmalloc (mblength + 2);
- temp[0] = CTLESC;
- for (i = 0; i < mblength; i++)
- temp[i+1] = string[sindex++];
- temp[mblength + 1] = '\0';
-
- goto add_string;
+ SADD_MBQCHAR_BODY(temp, string, sindex, string_size);
}
else
#endif
@@ -6014,7 +6162,9 @@ add_twochars:
/* HOWEVER, this fails if the string contains a literal
CTLNUL or CTLNUL is contained in the (non-null) expansion
of some variable. I'm not sure what to do about this
- yet. */
+ yet. There has to be some way to indicate the difference
+ between the two. An auxiliary data structure might be
+ necessary. */
if (QUOTED_NULL (temp) == 0)
remove_quoted_nulls (temp); /* XXX */
#endif
@@ -6104,31 +6254,12 @@ add_twochars:
else
{
#if HANDLE_MULTIBYTE
- /* XXX - I'd like to use SCOPY_CHAR_I here. */
+ if (MB_CUR_MAX > 1)
+ sindex--;
+
if (MB_CUR_MAX > 1)
{
- int i;
- mbstate_t state_bak;
- size_t mblength;
-
- sindex--;
- state_bak = state;
- mblength = mbrlen (string + sindex, string_size - sindex, &state);
- if (MB_INVALIDCH (mblength))
- {
- state = state_bak;
- mblength = 1;
- }
- if (mblength < 1)
- mblength = 1;
-
- temp = (char *)xmalloc (mblength + 2);
- temp[0] = CTLESC;
- for (i = 0; i < mblength; i++)
- temp[i + 1] = string[sindex++];
- temp[mblength + 1] = '\0';
-
- goto add_string;
+ SADD_MBQCHAR_BODY(temp, string, sindex, string_size);
}
else
#endif