summaryrefslogtreecommitdiff
path: root/dist/Unicode-Normalize
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2022-04-19 12:11:48 -0600
committerKarl Williamson <khw@cpan.org>2022-04-19 21:25:47 -0600
commitc2e0d3bb9f0e8d6999bfa6c5d969f505012bcf72 (patch)
treee72e453378a3444639314718e05bfc028bfe4808 /dist/Unicode-Normalize
parent9b91675263362c97438a0aa1b4dfa1d2573d1536 (diff)
downloadperl-c2e0d3bb9f0e8d6999bfa6c5d969f505012bcf72.tar.gz
Normalize: Use ppport.h
This commit changes Unicode::Normalize to use ppport.h. All modules that convert from UTF-8 should be using the latest ppport.h, so that they get the latest security checking available, which ppport.h backports to 6.1. This commit converts the module to use the utf8_to_uvchr_buf() method of translation from UTF-8, instead of a method that allows one to pass flags as to what code points to not accept. Since the flags parameter it was passing was zero, this is equivalent to utf8_to_uvchr_buf().
Diffstat (limited to 'dist/Unicode-Normalize')
-rw-r--r--dist/Unicode-Normalize/Normalize.pm2
-rw-r--r--dist/Unicode-Normalize/Normalize.xs53
2 files changed, 14 insertions, 41 deletions
diff --git a/dist/Unicode-Normalize/Normalize.pm b/dist/Unicode-Normalize/Normalize.pm
index 754fadb5cc..b67a293846 100644
--- a/dist/Unicode-Normalize/Normalize.pm
+++ b/dist/Unicode-Normalize/Normalize.pm
@@ -7,7 +7,7 @@ use Carp;
no warnings 'utf8';
-our $VERSION = '1.30';
+our $VERSION = '1.31';
our $PACKAGE = __PACKAGE__;
our @EXPORT = qw( NFC NFD NFKC NFKD );
diff --git a/dist/Unicode-Normalize/Normalize.xs b/dist/Unicode-Normalize/Normalize.xs
index a132358b64..210917eac7 100644
--- a/dist/Unicode-Normalize/Normalize.xs
+++ b/dist/Unicode-Normalize/Normalize.xs
@@ -13,6 +13,9 @@
#include "perl.h"
#include "XSUB.h"
+#define NEED_utf8_to_uvchr_buf
+#include "ppport.h"
+
/* These 5 files are prepared by mkheader */
#include "unfcmb.h"
#include "unfcan.h"
@@ -30,38 +33,8 @@
# else /* Perl 5.6.1 */
# define uvchr_to_utf8 uv_to_utf8
# endif
-
-# undef utf8n_to_uvchr
-# ifdef utf8n_to_uvuni
-# define utf8n_to_uvchr utf8n_to_uvuni
-# else /* Perl 5.6.1 */
-# define utf8n_to_uvchr utf8_to_uv
-# endif
#endif
-/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
-#ifndef UTF8_ALLOW_BOM
-#define UTF8_ALLOW_BOM (0)
-#endif /* UTF8_ALLOW_BOM */
-
-#ifndef UTF8_ALLOW_SURROGATE
-#define UTF8_ALLOW_SURROGATE (0)
-#endif /* UTF8_ALLOW_SURROGATE */
-
-#ifndef UTF8_ALLOW_FE_FF
-#define UTF8_ALLOW_FE_FF (0)
-#endif /* UTF8_ALLOW_FE_FF */
-
-#ifndef UTF8_ALLOW_FFFF
-#define UTF8_ALLOW_FFFF (0)
-#endif /* UTF8_ALLOW_FFFF */
-
-#ifndef PERL_UNUSED_VAR
-# define PERL_UNUSED_VAR(x) ((void)sizeof(x))
-#endif
-
-#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
-
/* check if the string buffer is enough before uvchr_to_utf8(). */
/* dstart, d, and dlen should be defined outside before. */
#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \
@@ -71,7 +44,7 @@
d = dstart + curlen; \
}
-/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */
+/* if utf8_to_uvchr_buf() sets retlen to 0 (if broken?) */
#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character"
/* utf8_hop() hops back before start. Maybe broken UTF-8 */
@@ -244,7 +217,7 @@ U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscom
while (p < e) {
STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "decompose");
p += retlen;
@@ -289,7 +262,7 @@ U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen)
while (p < e) {
U8 curCC;
STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "reorder");
p += retlen;
@@ -366,7 +339,7 @@ U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool isconti
while (p < e) {
U8 curCC;
STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "compose");
p += retlen;
@@ -636,7 +609,7 @@ checkNFD(src)
preCC = 0;
for (p = s; p < e; p += retlen) {
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "checkNFD or -NFKD");
@@ -673,7 +646,7 @@ checkNFC(src)
preCC = 0;
for (p = s; p < e; p += retlen) {
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "checkNFC or -NFKC");
@@ -731,7 +704,7 @@ checkFCD(src)
U8 *sCan;
UV uvLead;
STRLEN canlen = 0;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ UV uv = utf8_to_uvchr_buf(p, e, &retlen);
if (!retlen)
croak(ErrRetlenIsZero, "checkFCD or -FCC");
@@ -740,7 +713,7 @@ checkFCD(src)
if (sCan) {
STRLEN canret;
canlen = (STRLEN)strlen((char *) sCan);
- uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF);
+ uvLead = utf8_to_uvchr_buf(sCan, sCan + canlen, &canret);
if (!canret)
croak(ErrRetlenIsZero, "checkFCD or -FCC");
}
@@ -771,7 +744,7 @@ checkFCD(src)
U8* pCan = utf8_hop(eCan, -1);
if (pCan < sCan)
croak(ErrHopBeforeStart);
- uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF);
+ uvTrail = utf8_to_uvchr_buf(pCan, eCan, &canret);
if (!canret)
croak(ErrRetlenIsZero, "checkFCD or -FCC");
preCC = getCombinClass(uvTrail);
@@ -910,7 +883,7 @@ splitOnLastStarter(src)
p = utf8_hop(p, -1);
if (p < s)
croak(ErrHopBeforeStart);
- uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF);
+ uv = utf8_to_uvchr_buf(p, e, NULL);
if (getCombinClass(uv) == 0) /* Last Starter found */
break;
}