diff options
-rw-r--r-- | cygwin/cygwin.c | 8 | ||||
-rw-r--r-- | doop.c | 14 | ||||
-rw-r--r-- | op.c | 20 | ||||
-rw-r--r-- | pp_pack.c | 14 | ||||
-rw-r--r-- | regcomp.c | 8 | ||||
-rw-r--r-- | regexec.c | 10 | ||||
-rw-r--r-- | toke.c | 12 | ||||
-rw-r--r-- | utf8.c | 9 |
8 files changed, 46 insertions, 49 deletions
diff --git a/cygwin/cygwin.c b/cygwin/cygwin.c index 87401d1dab..e7be5e3232 100644 --- a/cygwin/cygwin.c +++ b/cygwin/cygwin.c @@ -156,7 +156,7 @@ wide_to_utf8(const wchar_t *wbuf) char *oldlocale = setlocale(LC_CTYPE, NULL); setlocale(LC_CTYPE, "utf-8"); - /* uvuni_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */ + /* uvchr_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */ wlen = wcsrtombs(NULL, (const wchar_t **)&wbuf, wlen, NULL); buf = (char *) safemalloc(wlen+1); wcsrtombs(buf, (const wchar_t **)&wbuf, wlen, NULL); @@ -176,7 +176,7 @@ utf8_to_wide(const char *buf) setlocale(LC_CTYPE, "utf-8"); wbuf = (wchar_t *) safemalloc(wlen); - /* utf8_to_uvuni_buf(pathname, pathname + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ + /* utf8_to_uvchr_buf(pathname, pathname + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ wlen = mbsrtowcs(wbuf, (const char**)&buf, wlen, &mbs); if (oldlocale) setlocale(LC_CTYPE, oldlocale); @@ -283,7 +283,7 @@ XS(XS_Cygwin_win_to_posix_path) mbstate_t mbs; char *oldlocale = setlocale(LC_CTYPE, NULL); setlocale(LC_CTYPE, "utf-8"); - /* utf8_to_uvuni_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ + /* utf8_to_uvchr_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ wlen = mbsrtowcs(wpath, (const char**)&src_path, wlen, &mbs); if (wlen > 0) err = cygwin_conv_path(what, wpath, wbuf, wlen); @@ -370,7 +370,7 @@ XS(XS_Cygwin_posix_to_win_path) setlocale(LC_CTYPE, "utf-8"); if (!IN_BYTES) { mbstate_t mbs; - /* utf8_to_uvuni_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ + /* utf8_to_uvchr_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */ wlen = mbsrtowcs(wpath, (const char**)&src_path, wlen, &mbs); if (wlen > 0) err = cygwin_conv_path(what, wpath, wbuf, wlen); @@ -361,7 +361,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) if (uv < none) { s += UTF8SKIP(s); matches++; - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); } else if (uv == none) { const int i = UTF8SKIP(s); @@ -372,7 +372,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv) else if (uv == extra) { s += UTF8SKIP(s); matches++; - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); } else s += UTF8SKIP(s); @@ -532,7 +532,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) matches++; s += UTF8SKIP(s); if (uv != puv) { - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); puv = uv; } continue; @@ -550,13 +550,13 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) if (havefinal) { s += UTF8SKIP(s); if (puv != final) { - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); puv = final; } } else { STRLEN len; - uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT); + uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); if (uv != puv) { Move(s, d, len, U8); d += len; @@ -585,7 +585,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) if (uv < none) { matches++; s += UTF8SKIP(s); - d = uvuni_to_utf8(d, uv); + d = uvchr_to_utf8(d, uv); continue; } else if (uv == none) { /* "none" is unmapped character */ @@ -598,7 +598,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv) else if (uv == extra && !del) { matches++; s += UTF8SKIP(s); - d = uvuni_to_utf8(d, final); + d = uvchr_to_utf8(d, final); continue; } matches++; /* "none+1" is delete character */ @@ -4129,11 +4129,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) i = 0; transv = newSVpvs(""); while (t < tend) { - cp[2*i] = utf8n_to_uvuni(t, tend-t, &ulen, flags); + cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; if (t < tend && *t == ILLEGAL_UTF8_BYTE) { t++; - cp[2*i+1] = utf8n_to_uvuni(t, tend-t, &ulen, flags); + cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; } else { @@ -4146,11 +4146,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) UV val = cp[2*j]; diff = val - nextmin; if (diff > 0) { - t = uvuni_to_utf8(tmpbuf,nextmin); + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); if (diff > 1) { U8 range_mark = ILLEGAL_UTF8_BYTE; - t = uvuni_to_utf8(tmpbuf, val - 1); + t = uvchr_to_utf8(tmpbuf, val - 1); sv_catpvn(transv, (char *)&range_mark, 1); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); } @@ -4159,13 +4159,13 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (val >= nextmin) nextmin = val + 1; } - t = uvuni_to_utf8(tmpbuf,nextmin); + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); { U8 range_mark = ILLEGAL_UTF8_BYTE; sv_catpvn(transv, (char *)&range_mark, 1); } - t = uvuni_to_utf8(tmpbuf, 0x7fffffff); + t = uvchr_to_utf8(tmpbuf, 0x7fffffff); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); t = (const U8*)SvPVX_const(transv); tlen = SvCUR(transv); @@ -4186,11 +4186,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { - tfirst = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags); + tfirst = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags); t += ulen; if (t < tend && *t == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */ t++; - tlast = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags); + tlast = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags); t += ulen; } else @@ -4200,11 +4200,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) /* now see if we need more "r" chars */ if (rfirst > rlast) { if (r < rend) { - rfirst = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags); + rfirst = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags); r += ulen; if (r < rend && *r == ILLEGAL_UTF8_BYTE) { /* illegal utf8 val indicates range */ r++; - rlast = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags); + rlast = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags); r += ulen; } else @@ -319,7 +319,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { if (ptr >= end) break; - utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags); + utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags); } if (from > end) from = end; } @@ -1316,10 +1316,10 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c len = UTF8SKIP(result); if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) &result[1], len-1, 'U')) break; - auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT); + auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT); s = ptr; } else { - auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT); + auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT); if (retlen == (STRLEN) -1 || retlen == 0) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; @@ -2585,8 +2585,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvuni_to_utf8_flags((U8 *) cur, - NATIVE_TO_UNI(auv), + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, + auv, warn_utf8 ? 0 : UNICODE_ALLOW_ANY); } else { @@ -2639,7 +2639,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) auv = SvUV(fromstr); if (utf8) { U8 buffer[UTF8_MAXLEN], *endb; - endb = uvuni_to_utf8_flags(buffer, auv, + endb = uvchr_to_utf8_flags(buffer, auv, warn_utf8 ? 0 : UNICODE_ALLOW_ANY); if (cur+(endb-buffer)*UTF8_EXPAND >= end) { @@ -2657,7 +2657,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv, + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, warn_utf8 ? 0 : UNICODE_ALLOW_ANY); } @@ -1443,7 +1443,7 @@ and would end up looking like: 8: EXACT <baz>(10) 10: END(0) - d = uvuni_to_utf8_flags(d, uv, 0); + d = uvchr_to_utf8_flags(d, uv, 0); is the recommended Unicode-aware way of saying @@ -1455,7 +1455,7 @@ is the recommended Unicode-aware way of saying if (UTF) { \ SV *zlopp = newSV(7); /* XXX: optimize me */ \ unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \ - unsigned const char *const kapow = uvuni_to_utf8(flrbbbbb, val); \ + unsigned const char *const kapow = uvchr_to_utf8(flrbbbbb, val); \ SvCUR_set(zlopp, kapow - flrbbbbb); \ SvPOK_on(zlopp); \ SvUTF8_on(zlopp); \ @@ -1470,12 +1470,12 @@ is the recommended Unicode-aware way of saying wordlen++; \ if ( UTF ) { \ /* if it is UTF then it is either already folded, or does not need folding */ \ - uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags); \ + uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags); \ } \ else if (folder == PL_fold_latin1) { \ /* if we use this folder we have to obey unicode rules on latin-1 data */ \ if ( foldlen > 0 ) { \ - uvc = utf8n_to_uvuni( (const U8*) scan, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) scan, UTF8_MAXLEN, &len, uniflags ); \ foldlen -= len; \ scan += len; \ len = 0; \ @@ -1238,7 +1238,7 @@ STMT_START { \ switch (trie_type) { \ case trie_utf8_fold: \ if ( foldlen>0 ) { \ - uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ foldlen -= len; \ uscan += len; \ len=0; \ @@ -1252,7 +1252,7 @@ STMT_START { \ break; \ case trie_latin_utf8_fold: \ if ( foldlen>0 ) { \ - uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ foldlen -= len; \ uscan += len; \ len=0; \ @@ -1265,7 +1265,7 @@ STMT_START { \ } \ break; \ case trie_utf8: \ - uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \ + uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \ break; \ case trie_plain: \ uvc = (UV)*uc; \ @@ -4050,7 +4050,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) while (chars) { if (utf8_target) { - uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len, + uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len, uniflags); uc += len; } @@ -4063,7 +4063,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) while (foldlen) { if (!--chars) break; - uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len, + uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len, uniflags); uscan += len; foldlen -= len; @@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) ENTER; SAVESPTR(PL_warnhook); PL_warnhook = PERL_WARNHOOK_FATAL; - utf8n_to_uvuni((U8*)p, e-p, NULL, 0); + utf8n_to_uvchr((U8*)p, e-p, NULL, 0); LEAVE; } } @@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags) bufend = PL_parser->bufend; } } - unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); + unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); if (retlen == (STRLEN)-1) { /* malformed UTF-8 */ ENTER; SAVESPTR(PL_warnhook); PL_warnhook = PERL_WARNHOOK_FATAL; - utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0); + utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); LEAVE; } return unichar; @@ -2761,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) { /* If warnings are on, this will print a more detailed analysis of what * is wrong than the error message below */ - utf8n_to_uvuni(first_bad_char_loc, + utf8n_to_uvchr(first_bad_char_loc, e - ((char *) first_bad_char_loc), NULL, 0); @@ -2903,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { /* If warnings are on, this will print a more detailed analysis of * what is wrong than the error message below */ - utf8n_to_uvuni(first_bad_char_loc, + utf8n_to_uvchr(first_bad_char_loc, (char *) first_bad_char_loc - str, NULL, 0); @@ -3442,7 +3442,7 @@ S_scan_const(pTHX_ char *start) } if (has_utf8) { - d = (char*)uvuni_to_utf8((U8*)d, uv); + d = (char*)uvchr_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { PL_sublex_info.sub_op->op_private |= @@ -2422,7 +2422,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, s = SvPV_const(*svp, len); if (len == 1) /* EIGHTBIT */ - len = uvuni_to_utf8(ustrp, NATIVE_TO_UNI(*(U8*)s)) - ustrp; + len = uvchr_to_utf8(ustrp, *(U8*)s) - ustrp; else { Copy(s, ustrp, len, U8); } @@ -3216,10 +3216,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8) /* If not cached, generate it via swatch_get */ if (!svp || !SvPOK(*svp) || !(tmps = (const U8*)SvPV_const(*svp, slen))) { - /* We use utf8n_to_uvuni() as we want an index into - Unicode tables, not a native character number. - */ - const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, + const UV code_point = utf8n_to_uvchr(ptr, UTF8_MAXBYTES, 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); swatch = swatch_get(swash, @@ -3904,7 +3901,7 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash) /* The key is the inverse mapping */ char key[UTF8_MAXBYTES+1]; - char* key_end = (char *) uvuni_to_utf8((U8*) key, val); + char* key_end = (char *) uvchr_to_utf8((U8*) key, val); STRLEN key_len = key_end - key; /* Get the list for the map */ |