diff options
author | Karl Williamson <khw@cpan.org> | 2022-04-19 12:11:48 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-04-19 21:25:47 -0600 |
commit | c2e0d3bb9f0e8d6999bfa6c5d969f505012bcf72 (patch) | |
tree | e72e453378a3444639314718e05bfc028bfe4808 /dist/Unicode-Normalize | |
parent | 9b91675263362c97438a0aa1b4dfa1d2573d1536 (diff) | |
download | perl-c2e0d3bb9f0e8d6999bfa6c5d969f505012bcf72.tar.gz |
Normalize: Use ppport.h
This commit changes Unicode::Normalize to use ppport.h.
All modules that convert from UTF-8 should be using the latest ppport.h,
so that they get the latest security checking available, which ppport.h
backports to 6.1.
This commit converts the module to use the utf8_to_uvchr_buf() method of
translation from UTF-8, instead of a method that allows one to pass
flags as to what code points to not accept. Since the flags parameter
it was passing was zero, this is equivalent to utf8_to_uvchr_buf().
Diffstat (limited to 'dist/Unicode-Normalize')
-rw-r--r-- | dist/Unicode-Normalize/Normalize.pm | 2 | ||||
-rw-r--r-- | dist/Unicode-Normalize/Normalize.xs | 53 |
2 files changed, 14 insertions, 41 deletions
diff --git a/dist/Unicode-Normalize/Normalize.pm b/dist/Unicode-Normalize/Normalize.pm index 754fadb5cc..b67a293846 100644 --- a/dist/Unicode-Normalize/Normalize.pm +++ b/dist/Unicode-Normalize/Normalize.pm @@ -7,7 +7,7 @@ use Carp; no warnings 'utf8'; -our $VERSION = '1.30'; +our $VERSION = '1.31'; our $PACKAGE = __PACKAGE__; our @EXPORT = qw( NFC NFD NFKC NFKD ); diff --git a/dist/Unicode-Normalize/Normalize.xs b/dist/Unicode-Normalize/Normalize.xs index a132358b64..210917eac7 100644 --- a/dist/Unicode-Normalize/Normalize.xs +++ b/dist/Unicode-Normalize/Normalize.xs @@ -13,6 +13,9 @@ #include "perl.h" #include "XSUB.h" +#define NEED_utf8_to_uvchr_buf +#include "ppport.h" + /* These 5 files are prepared by mkheader */ #include "unfcmb.h" #include "unfcan.h" @@ -30,38 +33,8 @@ # else /* Perl 5.6.1 */ # define uvchr_to_utf8 uv_to_utf8 # endif - -# undef utf8n_to_uvchr -# ifdef utf8n_to_uvuni -# define utf8n_to_uvchr utf8n_to_uvuni -# else /* Perl 5.6.1 */ -# define utf8n_to_uvchr utf8_to_uv -# endif #endif -/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ -#ifndef UTF8_ALLOW_BOM -#define UTF8_ALLOW_BOM (0) -#endif /* UTF8_ALLOW_BOM */ - -#ifndef UTF8_ALLOW_SURROGATE -#define UTF8_ALLOW_SURROGATE (0) -#endif /* UTF8_ALLOW_SURROGATE */ - -#ifndef UTF8_ALLOW_FE_FF -#define UTF8_ALLOW_FE_FF (0) -#endif /* UTF8_ALLOW_FE_FF */ - -#ifndef UTF8_ALLOW_FFFF -#define UTF8_ALLOW_FFFF (0) -#endif /* UTF8_ALLOW_FFFF */ - -#ifndef PERL_UNUSED_VAR -# define PERL_UNUSED_VAR(x) ((void)sizeof(x)) -#endif - -#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) - /* check if the string buffer is enough before uvchr_to_utf8(). */ /* dstart, d, and dlen should be defined outside before. */ #define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \ @@ -71,7 +44,7 @@ d = dstart + curlen; \ } -/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */ +/* if utf8_to_uvchr_buf() sets retlen to 0 (if broken?) */ #define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character" /* utf8_hop() hops back before start. Maybe broken UTF-8 */ @@ -244,7 +217,7 @@ U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscom while (p < e) { STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "decompose"); p += retlen; @@ -289,7 +262,7 @@ U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen) while (p < e) { U8 curCC; STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "reorder"); p += retlen; @@ -366,7 +339,7 @@ U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool isconti while (p < e) { U8 curCC; STRLEN retlen; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "compose"); p += retlen; @@ -636,7 +609,7 @@ checkNFD(src) preCC = 0; for (p = s; p < e; p += retlen) { - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "checkNFD or -NFKD"); @@ -673,7 +646,7 @@ checkNFC(src) preCC = 0; for (p = s; p < e; p += retlen) { - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "checkNFC or -NFKC"); @@ -731,7 +704,7 @@ checkFCD(src) U8 *sCan; UV uvLead; STRLEN canlen = 0; - UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); + UV uv = utf8_to_uvchr_buf(p, e, &retlen); if (!retlen) croak(ErrRetlenIsZero, "checkFCD or -FCC"); @@ -740,7 +713,7 @@ checkFCD(src) if (sCan) { STRLEN canret; canlen = (STRLEN)strlen((char *) sCan); - uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF); + uvLead = utf8_to_uvchr_buf(sCan, sCan + canlen, &canret); if (!canret) croak(ErrRetlenIsZero, "checkFCD or -FCC"); } @@ -771,7 +744,7 @@ checkFCD(src) U8* pCan = utf8_hop(eCan, -1); if (pCan < sCan) croak(ErrHopBeforeStart); - uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF); + uvTrail = utf8_to_uvchr_buf(pCan, eCan, &canret); if (!canret) croak(ErrRetlenIsZero, "checkFCD or -FCC"); preCC = getCombinClass(uvTrail); @@ -910,7 +883,7 @@ splitOnLastStarter(src) p = utf8_hop(p, -1); if (p < s) croak(ErrHopBeforeStart); - uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF); + uv = utf8_to_uvchr_buf(p, e, NULL); if (getCombinClass(uv) == 0) /* Last Starter found */ break; } |