diff options
author | Chet Ramey <chet.ramey@case.edu> | 2011-12-03 12:53:09 -0500 |
---|---|---|
committer | Chet Ramey <chet.ramey@case.edu> | 2011-12-03 12:53:09 -0500 |
commit | 545f34cfd4e62689f390814b4413aa7c78f85faf (patch) | |
tree | f6763ea3d6ce54967f02309e80f9752ddbfd2875 /subst.c | |
parent | 5e13499c55639e93fbe46ce3dc053d74e5578cf9 (diff) | |
download | bash-545f34cfd4e62689f390814b4413aa7c78f85faf.tar.gz |
commit bash-20040108 snapshot
Diffstat (limited to 'subst.c')
-rw-r--r-- | subst.c | 265 |
1 files changed, 198 insertions, 67 deletions
@@ -4,7 +4,7 @@ /* ``Have a little faith, there's magic in the night. You ain't a beauty, but, hey, you're alright.'' */ -/* Copyright (C) 1987-2003 Free Software Foundation, Inc. +/* Copyright (C) 1987-2004 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -210,6 +210,8 @@ static char *extract_dollar_brace_string __P((char *, int *, int, int)); static char *pos_params __P((char *, int, int, int)); +static unsigned char *mb_getcharlens __P((char *, int)); + static char *remove_pattern __P((char *, char *, int)); static int match_pattern_char __P((char *, char *)); static int match_pattern __P((char *, char *, int, char **, char **)); @@ -245,6 +247,7 @@ static intmax_t parameter_brace_expand_length __P((char *)); static char *skiparith __P((char *, int)); static int verify_substring_values __P((char *, char *, int, intmax_t *, intmax_t *)); static int get_var_and_type __P((char *, char *, SHELL_VAR **, char **)); +static char *mb_substring __P((char *, int, int)); static char *parameter_brace_substring __P((char *, char *, char *, int)); static char *pos_params_pat_subst __P((char *, char *, char *, int)); @@ -2836,6 +2839,31 @@ word_list_remove_quoted_nulls (list) /* */ /* **************************************************************** */ +#if defined (HANDLE_MULTIBYTE) +static unsigned char * +mb_getcharlens (string, len) + char *string; + int len; +{ + int i, offset; + unsigned char last, *ret; + char *p; + DECLARE_MBSTATE; + + i = offset = 0; + last = 0; + ret = (unsigned char *)xmalloc (len); + memset (ret, 0, len); + while (string[last]) + { + ADVANCE_CHAR (string, len, offset); + ret[last] = offset - last; + last = offset; + } + return ret; +} +#endif + /* Remove the portion of PARAM matched by PATTERN according to OP, where OP can have one of 4 values: RP_LONG_LEFT remove longest matching portion at start of PARAM @@ -2857,6 +2885,9 @@ remove_pattern (param, pattern, op) register int len; register char *end; register char *p, *ret, c; + int offset; + unsigned char *mblen; + DECLARE_MBSTATE; if (param == NULL) return (param); @@ -2866,25 +2897,42 @@ remove_pattern (param, pattern, op) len = STRLEN (param); end = param + len; + mblen = (unsigned char *)0; +#if defined (HANDLE_MULTIBYTE) + if (MB_CUR_MAX > 1 && (op == RP_LONG_LEFT || op == RP_SHORT_RIGHT)) + mblen = mb_getcharlens (param, len); +#endif + switch (op) { case RP_LONG_LEFT: /* remove longest match at start */ - /* BACKUP_CHAR_P (param, len, p); */ - for (p = end; p >= param; p--) + p = end; + while (p >= param) { c = *p; *p = '\0'; if (strmatch (pattern, param, FNMATCH_EXTFLAG) != FNM_NOMATCH) { *p = c; + FREE (mblen); return (savestring (p)); } *p = c; + + if (MB_CUR_MAX > 1) + { + while (p >= param) + if (mblen[--p - param]) + break; + } + else + p--; } break; case RP_SHORT_LEFT: /* remove shortest match at start */ - /* ADVANCE_CHAR_P (p, end - p),p++ */ - for (p = param; p <= end; p++) + p = param; + offset = 0; + while (p <= end) { c = *p; *p = '\0'; if (strmatch (pattern, param, FNMATCH_EXTFLAG) != FNM_NOMATCH) @@ -2893,12 +2941,21 @@ remove_pattern (param, pattern, op) return (savestring (p)); } *p = c; + + if (MB_CUR_MAX > 1) + { + ADVANCE_CHAR (param, len, offset); + p = param + offset; + } + else + p++; } break; case RP_LONG_RIGHT: /* remove longest match at end */ - /* ADVANCE_CHAR_P (p, end - p),p++ */ - for (p = param; p <= end; p++) + p = param; + offset = 0; + while (p <= end) { if (strmatch (pattern, p, FNMATCH_EXTFLAG) != FNM_NOMATCH) { @@ -2907,23 +2964,43 @@ remove_pattern (param, pattern, op) *p = c; return (ret); } + + if (MB_CUR_MAX > 1) + { + ADVANCE_CHAR (param, len, offset); + p = param + offset; + } + else + p++; } break; case RP_SHORT_RIGHT: /* remove shortest match at end */ - /* BACKUP_CHAR_P (param, len, p); */ - for (p = end; p >= param; p--) + p = end; + while (p >= param) { if (strmatch (pattern, p, FNMATCH_EXTFLAG) != FNM_NOMATCH) { c = *p; *p = '\0'; ret = savestring (param); *p = c; + FREE (mblen); return (ret); } + + if (MB_CUR_MAX > 1) + { + while (p >= param) + if (mblen[--p - param]) + break; + } + else + p--; } break; } + + FREE (mblen); return (savestring (param)); /* no match, return original string */ } @@ -2972,6 +3049,9 @@ match_pattern (string, pat, mtype, sp, ep) int c, len; register char *p, *p1; char *end; + int offset; + unsigned char *mblen; + DECLARE_MBSTATE; if (string == 0 || *string == 0 || pat == 0 || *pat == 0) return (0); @@ -2979,16 +3059,23 @@ match_pattern (string, pat, mtype, sp, ep) len = STRLEN (string); end = string + len; + mblen = (unsigned char *)0; +#if defined (HANDLE_MULTIBYTE) + if (MB_CUR_MAX > 1 && (mtype == MATCH_ANY || mtype == MATCH_BEG)) + mblen = mb_getcharlens (string, len); +#endif + switch (mtype) { case MATCH_ANY: - /* ADVANCE_CHAR_P (p, end - p),p++ */ - for (p = string; p <= end; p++) + p = string; + offset = 0; + while (p <= end) { if (match_pattern_char (pat, p)) { - /* BACKUP_CHAR_P (p, end - p, p1) */ - for (p1 = end; p1 >= p; p1--) + p1 = end; + while (p1 >= p) { c = *p1; *p1 = '\0'; if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0) @@ -2996,19 +3083,40 @@ match_pattern (string, pat, mtype, sp, ep) *p1 = c; *sp = p; *ep = p1; + FREE (mblen); return 1; } *p1 = c; + + if (MB_CUR_MAX > 1) + { + while (p1 >= p) + if (mblen[--p1 - string]) + break; + } + else + p1--; } } + + if (MB_CUR_MAX > 1) + { + ADVANCE_CHAR (string, len, offset); + p = string + offset; + } + else + p++; } + + FREE (mblen); return (0); case MATCH_BEG: if (match_pattern_char (pat, string) == 0) return (0); - /* BACKUP_CHAR_P (string, len, p) */ - for (p = end; p >= string; p--) + + p = end; + while (p >= string) { c = *p; *p = '\0'; if (strmatch (pat, string, FNMATCH_EXTFLAG) == 0) @@ -3016,24 +3124,48 @@ match_pattern (string, pat, mtype, sp, ep) *p = c; *sp = string; *ep = p; + FREE (mblen); return 1; } *p = c; + + if (MB_CUR_MAX > 1) + { + while (p >= string) + if (mblen[--p - string]) + break; + } + else + p--; } + + FREE (mblen); return (0); case MATCH_END: - /* ADVANCE_CHAR_P(p, end - p), p++ */ - for (p = string; p <= end; p++) - if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0) - { - *sp = p; - *ep = end; - return 1; - } + p = string; + offset = 0; + while (p <= end) + { + if (strmatch (pat, p, FNMATCH_EXTFLAG) == 0) + { + *sp = p; + *ep = end; + return 1; + } + + if (MB_CUR_MAX > 1) + { + ADVANCE_CHAR (string, len, offset); + p = string + offset; + } + else + p++; + } return (0); } + FREE (mblen); return (0); } @@ -4622,6 +4754,35 @@ get_var_and_type (varname, value, varp, valp) /* */ /******************************************************/ +#if defined (HANDLE_MULTIBYTE) +/* Character-oriented rather than strictly byte-oriented substrings. S and + E, rather being strict indices into STRING, indicate character (possibly + multibyte character) positions that require calculation. + Used by the ${param:offset[:length]} expansion. */ +static char * +mb_substring (string, s, e) + char *string; + int s, e; +{ + char *tt; + int start, stop, i, slen; + DECLARE_MBSTATE; + + start = 0; + slen = STRLEN (string); + + i = s; + while (string[start] && i--) + ADVANCE_CHAR (string, slen, start); + stop = start; + i = e - s; + while (string[stop] && i--) + ADVANCE_CHAR (string, slen, stop); + tt = substring (string, start, stop); + return tt; +} +#endif + /* Process a variable substring expansion: ${name:e1[:e2]}. If VARNAME is `@', use the positional parameters; otherwise, use the value of VARNAME. If VARNAME is an array variable, use the array elements. */ @@ -4656,7 +4817,13 @@ parameter_brace_substring (varname, value, substr, quoted) { case VT_VARIABLE: case VT_ARRAYMEMBER: +#if defined (HANDLE_MULTIBYTE) + if (MB_CUR_MAX > 1) + tt = mb_substring (val, e1, e2); + else +#endif tt = substring (val, e1, e2); + if (vtype == VT_VARIABLE) FREE (val); if (quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT)) @@ -5747,26 +5914,7 @@ expand_word_internal (word, quoted, isexp, contains_dollar_at, expanded_somethin #if HANDLE_MULTIBYTE if (MB_CUR_MAX > 1 && string[sindex]) { - int i; - mbstate_t state_bak; - size_t mblength; - - state_bak = state; - mblength = mbrlen (string + sindex, string_size - sindex, &state); - if (MB_INVALIDCH (mblength)) - { - state = state_bak; - mblength = 1; - } - if (mblength < 1) - mblength = 1; - temp = (char *)xmalloc (mblength + 2); - temp[0] = CTLESC; - for (i = 0; i < mblength; i++) - temp[i+1] = string[sindex++]; - temp[mblength + 1] = '\0'; - - goto add_string; + SADD_MBQCHAR_BODY(temp, string, sindex, string_size); } else #endif @@ -6014,7 +6162,9 @@ add_twochars: /* HOWEVER, this fails if the string contains a literal CTLNUL or CTLNUL is contained in the (non-null) expansion of some variable. I'm not sure what to do about this - yet. */ + yet. There has to be some way to indicate the difference + between the two. An auxiliary data structure might be + necessary. */ if (QUOTED_NULL (temp) == 0) remove_quoted_nulls (temp); /* XXX */ #endif @@ -6104,31 +6254,12 @@ add_twochars: else { #if HANDLE_MULTIBYTE - /* XXX - I'd like to use SCOPY_CHAR_I here. */ + if (MB_CUR_MAX > 1) + sindex--; + if (MB_CUR_MAX > 1) { - int i; - mbstate_t state_bak; - size_t mblength; - - sindex--; - state_bak = state; - mblength = mbrlen (string + sindex, string_size - sindex, &state); - if (MB_INVALIDCH (mblength)) - { - state = state_bak; - mblength = 1; - } - if (mblength < 1) - mblength = 1; - - temp = (char *)xmalloc (mblength + 2); - temp[0] = CTLESC; - for (i = 0; i < mblength; i++) - temp[i + 1] = string[sindex++]; - temp[mblength + 1] = '\0'; - - goto add_string; + SADD_MBQCHAR_BODY(temp, string, sindex, string_size); } else #endif |