summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xembed.pl6
-rw-r--r--pod/perlapi.pod81
-rw-r--r--pod/perlunicode.pod13
-rw-r--r--utf8.c110
4 files changed, 152 insertions, 58 deletions
diff --git a/embed.pl b/embed.pl
index 383c305e3d..639ba6cf9e 100755
--- a/embed.pl
+++ b/embed.pl
@@ -1333,7 +1333,7 @@ Apd |HE* |hv_store_ent |HV* tb|SV* key|SV* val|U32 hash
Apd |void |hv_undef |HV* tb
Ap |I32 |ibcmp |const char* a|const char* b|I32 len
Ap |I32 |ibcmp_locale |const char* a|const char* b|I32 len
-Ap |I32 |ibcmp_utf8 |const char* a|bool ua|const char* b|bool ub|I32 len
+Apd |I32 |ibcmp_utf8 |const char* a|bool ua|const char* b|bool ub|I32 len
p |bool |ingroup |Gid_t testgid|Uid_t effective
p |void |init_argv_symbols|int|char **
p |void |init_debugger
@@ -1851,9 +1851,9 @@ Adp |UV |utf8n_to_uvchr |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
Adp |UV |utf8n_to_uvuni |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
Apd |U8* |uvchr_to_utf8 |U8 *d|UV uv
Apd |U8* |uvuni_to_utf8 |U8 *d|UV uv
-Ap |char* |pv_uni_display |SV *dsv|U8 *spv|STRLEN len \
+Apd |char* |pv_uni_display |SV *dsv|U8 *spv|STRLEN len \
|STRLEN pvlim|UV flags
-Ap |char* |sv_uni_display |SV *dsv|SV *ssv|STRLEN pvlim|UV flags
+Apd |char* |sv_uni_display |SV *dsv|SV *ssv|STRLEN pvlim|UV flags
p |void |vivify_defelem |SV* sv
p |void |vivify_ref |SV* sv|U32 to_what
p |I32 |wait4pid |Pid_t pid|int* statusp|int flags
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 2ca1b2154d..6ac32f4014 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -1108,6 +1108,23 @@ Undefines the hash.
=for hackers
Found in file hv.c
+=item ibcmp_utf8
+
+Return true if the strings s1 and s2 differ case-insensitively, false
+if not (if they are equal case-insensitively). If u1 is true, the
+string s1 is assumed to be in UTF-8-encoded Unicode. If u2 is true,
+the string s2 is assumed to be in UTF-8-encoded Unicode. (If both u1
+and u2 are false, ibcmp() is called.)
+
+For case-insensitiveness, the "casefolding" of Unicode is used
+instead of upper/lowercasing both the characters, see
+http://www.unicode.org/unicode/reports/tr21/ (Case Mappings).
+
+ I32 ibcmp_utf8(const char* a, bool ua, const char* b, bool ub, I32 len)
+
+=for hackers
+Found in file utf8.c
+
=item isALNUM
Returns a boolean indicating whether the C C<char> is an ASCII alphanumeric
@@ -1404,6 +1421,17 @@ SV is B<not> incremented.
=for hackers
Found in file sv.c
+=item newSV
+
+Create a new null SV, or if len > 0, create a new empty SVt_PV type SV
+with an initial PV allocation of len+1. Normally accessed via the C<NEWSV>
+macro.
+
+ SV* newSV(STRLEN len)
+
+=for hackers
+Found in file sv.c
+
=item NEWSV
Creates a new SV. A non-zero C<len> parameter indicates the number of
@@ -1417,17 +1445,6 @@ C<id> is an integer id between 0 and 1299 (used to identify leaks).
=for hackers
Found in file handy.h
-=item newSV
-
-Create a new null SV, or if len > 0, create a new empty SVt_PV type SV
-with an initial PV allocation of len+1. Normally accessed via the C<NEWSV>
-macro.
-
- SV* newSV(STRLEN len)
-
-=for hackers
-Found in file sv.c
-
=item newSViv
Creates a new SV and copies an integer into it. The reference count for the
@@ -1867,6 +1884,19 @@ See C<PUSHMARK> and L<perlcall> for other uses.
=for hackers
Found in file pp.h
+=item pv_uni_display
+
+Build to the scalar dsv a displayable version of the string spv,
+length len, the displayable version being at most pvlim bytes long
+(if longer, the rest is truncated and "..." will be appended).
+The flags argument is currently unused but available for future extensions.
+The pointer to the PV of the dsv is returned.
+
+ char* pv_uni_display(SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags)
+
+=for hackers
+Found in file utf8.c
+
=item Renew
The XSUB-writer's interface to the C C<realloc> function.
@@ -2299,22 +2329,22 @@ version which guarantees to evaluate sv only once.
=for hackers
Found in file sv.h
-=item SvIVX
+=item SvIVx
-Returns the raw value in the SV's IV slot, without checks or conversions.
-Only use when you are sure SvIOK is true. See also C<SvIV()>.
+Coerces the given SV to an integer and returns it. Guarantees to evaluate
+sv only once. Use the more efficient C<SvIV> otherwise.
- IV SvIVX(SV* sv)
+ IV SvIVx(SV* sv)
=for hackers
Found in file sv.h
-=item SvIVx
+=item SvIVX
-Coerces the given SV to an integer and returns it. Guarantees to evaluate
-sv only once. Use the more efficient C<SvIV> otherwise.
+Returns the raw value in the SV's IV slot, without checks or conversions.
+Only use when you are sure SvIOK is true. See also C<SvIV()>.
- IV SvIVx(SV* sv)
+ IV SvIVX(SV* sv)
=for hackers
Found in file sv.h
@@ -4035,6 +4065,19 @@ instead use an in-line version.
=for hackers
Found in file sv.c
+=item sv_uni_display
+
+Build to the scalar dsv a displayable version of the scalar sv,
+he displayable version being at most pvlim bytes long
+(if longer, the rest is truncated and "..." will be appended).
+The flags argument is currently unused but available for future extensions.
+The pointer to the PV of the dsv is returned.
+
+ char* sv_uni_display(SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
+
+=for hackers
+Found in file utf8.c
+
=item sv_unmagic
Removes all magic of type C<type> from an SV.
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index e8a5fff5c4..b1ffed534f 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -876,6 +876,19 @@ utf8_hop(s, off) will return a pointer to an UTF-8 encoded buffer that
is C<off> (positive or negative) Unicode characters displaced from the
UTF-8 buffer C<s>.
+=item *
+
+pv_uni_display(dsv, spv, len, pvlim, flags) and sv_uni_display(dsv,
+ssv, pvlim, flags) are useful for debug output of Unicode strings and
+scalars (only for debug: they display B<all> characters as hexadecimal
+code points).
+
+=item *
+
+ibcmp_utf8(s1, u1, s2, u2, len) can be used to compare two strings
+case-insensitively in Unicode. (For case-sensitive comparisons you
+can just use memEQ() and memNE() as usual.)
+
=back
For more information, see L<perlapi>, and F<utf8.c> and F<utf8.h>
diff --git a/utf8.c b/utf8.c
index 7da1e5bc69..30a4908e64 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1524,6 +1524,16 @@ Perl_utf8n_to_uvchr(pTHX_ U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
return UNI_TO_NATIVE(uv);
}
+/*
+=for apidoc A|char *|pv_uni_display|SV *dsv|U8 *spv|STRLEN len|STRLEN pvlim|UV flags
+
+Build to the scalar dsv a displayable version of the string spv,
+length len, the displayable version being at most pvlim bytes long
+(if longer, the rest is truncated and "..." will be appended).
+The flags argument is currently unused but available for future extensions.
+The pointer to the PV of the dsv is returned.
+
+=cut */
char *
Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags)
{
@@ -1546,6 +1556,16 @@ Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags)
return SvPVX(dsv);
}
+/*
+=for apidoc A|char *|sv_uni_display|SV *dsv|SV *ssv|STRLEN pvlim|UV flags
+
+Build to the scalar dsv a displayable version of the scalar sv,
+he displayable version being at most pvlim bytes long
+(if longer, the rest is truncated and "..." will be appended).
+The flags argument is currently unused but available for future extensions.
+The pointer to the PV of the dsv is returned.
+
+=cut */
char *
Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
{
@@ -1553,47 +1573,65 @@ Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
pvlim, flags);
}
+/*
+=for apidoc A|I32|ibcmp_utf8|const char *s1|bool u1|const char *s2|bool u2|register I32 len
+
+Return true if the strings s1 and s2 differ case-insensitively, false
+if not (if they are equal case-insensitively). If u1 is true, the
+string s1 is assumed to be in UTF-8-encoded Unicode. If u2 is true,
+the string s2 is assumed to be in UTF-8-encoded Unicode. (If both u1
+and u2 are false, ibcmp() is called.)
+
+For case-insensitiveness, the "casefolding" of Unicode is used
+instead of upper/lowercasing both the characters, see
+http://www.unicode.org/unicode/reports/tr21/ (Case Mappings).
+
+=cut */
I32
Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, const char *s2, bool u2, register I32 len)
{
- register U8 *a = (U8*)s1;
- register U8 *b = (U8*)s2;
- STRLEN la, lb;
- UV ca, cb;
- STRLEN ulen1, ulen2;
- U8 tmpbuf1[UTF8_MAXLEN*3+1];
- U8 tmpbuf2[UTF8_MAXLEN*3+1];
-
- while (len) {
- if (u1)
- ca = utf8_to_uvchr((U8*)a, &la);
- else {
- ca = *a;
- la = 1;
- }
- if (u2)
- cb = utf8_to_uvchr((U8*)b, &lb);
- else {
- cb = *b;
- lb = 1;
- }
- if (ca != cb) {
+ if (u1 || u2) {
+ register U8 *a = (U8*)s1;
+ register U8 *b = (U8*)s2;
+ STRLEN la, lb;
+ UV ca, cb;
+ STRLEN ulen1, ulen2;
+ U8 tmpbuf1[UTF8_MAXLEN*3+1];
+ U8 tmpbuf2[UTF8_MAXLEN*3+1];
+
+ while (len) {
if (u1)
- to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
- else
- ulen1 = 1;
+ ca = utf8_to_uvchr((U8*)a, &la);
+ else {
+ ca = *a;
+ la = 1;
+ }
if (u2)
- to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
- else
- ulen2 = 1;
- if (ulen1 != ulen2
- || (ulen1 == 1 && PL_fold[ca] != PL_fold[cb])
- || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
- return 1;
+ cb = utf8_to_uvchr((U8*)b, &lb);
+ else {
+ cb = *b;
+ lb = 1;
+ }
+ if (ca != cb) {
+ if (u1)
+ to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
+ else
+ ulen1 = 1;
+ if (u2)
+ to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
+ else
+ ulen2 = 1;
+ if (ulen1 != ulen2
+ || (ulen1 == 1 && PL_fold[ca] != PL_fold[cb])
+ || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
+ return 1;
+ }
+ a += la;
+ b += lb;
}
- a += la;
- b += lb;
- }
- return 0;
+ return 0;
+ }
+ else
+ return ibcmp(s1, s2);
}