diff options
-rw-r--r-- | embed.h | 4 | ||||
-rwxr-xr-x | embed.pl | 3 | ||||
-rw-r--r-- | ext/DynaLoader/DynaLoader_pm.PL | 6 | ||||
-rw-r--r-- | gv.c | 2 | ||||
-rw-r--r-- | lib/IPC/Open3.pm | 27 | ||||
-rw-r--r-- | objXSUB.h | 4 | ||||
-rw-r--r-- | perlapi.c | 9 | ||||
-rw-r--r-- | pod/perlapi.pod | 30 | ||||
-rw-r--r-- | pod/perlfunc.pod | 15 | ||||
-rw-r--r-- | proto.h | 3 | ||||
-rw-r--r-- | sv.c | 2 | ||||
-rw-r--r-- | t/lib/peek.t | 2 | ||||
-rw-r--r-- | utf8.c | 41 |
13 files changed, 113 insertions, 35 deletions
@@ -305,6 +305,7 @@ #define to_uni_title_lc Perl_to_uni_title_lc #define to_uni_lower_lc Perl_to_uni_lower_lc #define is_utf8_char Perl_is_utf8_char +#define is_utf8_string Perl_is_utf8_string #define is_utf8_alnum Perl_is_utf8_alnum #define is_utf8_alnumc Perl_is_utf8_alnumc #define is_utf8_idfirst Perl_is_utf8_idfirst @@ -1759,6 +1760,7 @@ #define to_uni_title_lc(a) Perl_to_uni_title_lc(aTHX_ a) #define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a) #define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a) +#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b) #define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a) #define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a) #define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a) @@ -3447,6 +3449,8 @@ #define to_uni_lower_lc Perl_to_uni_lower_lc #define Perl_is_utf8_char CPerlObj::Perl_is_utf8_char #define is_utf8_char Perl_is_utf8_char +#define Perl_is_utf8_string CPerlObj::Perl_is_utf8_string +#define is_utf8_string Perl_is_utf8_string #define Perl_is_utf8_alnum CPerlObj::Perl_is_utf8_alnum #define is_utf8_alnum Perl_is_utf8_alnum #define Perl_is_utf8_alnumc CPerlObj::Perl_is_utf8_alnumc @@ -1620,6 +1620,7 @@ Ap |U32 |to_uni_upper_lc|U32 c Ap |U32 |to_uni_title_lc|U32 c Ap |U32 |to_uni_lower_lc|U32 c Ap |int |is_utf8_char |U8 *p +Ap |bool |is_utf8_string |U8 *s|STRLEN len Ap |bool |is_utf8_alnum |U8 *p Ap |bool |is_utf8_alnumc |U8 *p Ap |bool |is_utf8_idfirst|U8 *p @@ -2063,7 +2064,7 @@ Ap |U8* |utf16_to_utf8_reversed|U16* p|U8 *d|I32 bytelen Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off Ap |U8* |utf8_to_bytes |U8 *s|STRLEN len -Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN len +Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN *len Ap |UV |utf8_to_uv |U8 *s|I32* retlen Ap |U8* |uv_to_utf8 |U8 *d|UV uv p |void |vivify_defelem |SV* sv diff --git a/ext/DynaLoader/DynaLoader_pm.PL b/ext/DynaLoader/DynaLoader_pm.PL index 55b8eca727..b7b45d8372 100644 --- a/ext/DynaLoader/DynaLoader_pm.PL +++ b/ext/DynaLoader/DynaLoader_pm.PL @@ -21,7 +21,7 @@ package DynaLoader; # feast like to keep their secret; for wonder makes the words of # praise louder.' -# (Quote from Tolkien sugested by Anno Siegel.) +# (Quote from Tolkien suggested by Anno Siegel.) # # See pod text at end of file for documentation. # See also ext/DynaLoader/README in source tree for other information. @@ -170,8 +170,8 @@ sub bootstrap { print STDERR "DynaLoader::bootstrap for $module ", ($Is_MacOS - ? "(auto/$modpname/$modfname.$dl_dlext)\n" : - "(:auto:$modpname:$modfname.$dl_dlext)\n") + ? "(:auto:$modpname:$modfname.$dl_dlext)\n" : + "(auto/$modpname/$modfname.$dl_dlext)\n") if $dl_debug; foreach (@INC) { @@ -106,7 +106,7 @@ Perl_gv_init(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, int multi) GvFILE(gv) = CopFILE(PL_curcop) ? CopFILE(PL_curcop) : ""; GvCVGEN(gv) = 0; GvEGV(gv) = gv; - sv_magic((SV*)gv, (SV*)gv, '*', name, len); + sv_magic((SV*)gv, (SV*)gv, '*', Nullch, 0); GvSTASH(gv) = (HV*)SvREFCNT_inc(stash); GvNAME(gv) = savepvn(name, len); GvNAMELEN(gv) = len; diff --git a/lib/IPC/Open3.pm b/lib/IPC/Open3.pm index 46ebd68cef..6d91c81381 100644 --- a/lib/IPC/Open3.pm +++ b/lib/IPC/Open3.pm @@ -84,6 +84,7 @@ The order of arguments differs from that of open2(). # fixed for 5.001 by Ulrich Kunitz <kunitz@mai-koeln.com> # ported to Win32 by Ron Schmidt, Merrill Lynch almost ended my career # fixed for autovivving FHs, tchrist again +# allow fd numbers to be used, by Frank Tobin # # $Id: open3.pl,v 1.1 1993/11/23 06:26:15 marc Exp $ # @@ -136,6 +137,16 @@ sub xclose { close $_[0] or croak "$Me: close($_[0]) failed: $!"; } +sub xfileno { + my ($fh) = @_; + return $1 if $fh =~ /^=?(\d+)$/; # deal with $fh just being an fd + return fileno $fh; +} + +sub fh_is_fd { + return $_[0] =~ /^=?\d+$/; +} + my $do_spawn = $^O eq 'os2' || $^O eq 'MSWin32'; sub _open3 { @@ -164,9 +175,9 @@ sub _open3 { $dup_err = ($dad_err =~ s/^[<>]&//); # force unqualified filehandles into caller's package - $dad_wtr = qualify $dad_wtr, $package; - $dad_rdr = qualify $dad_rdr, $package; - $dad_err = qualify $dad_err, $package; + $dad_wtr = qualify $dad_wtr, $package unless fh_is_fd($dad_wtr); + $dad_rdr = qualify $dad_rdr, $package unless fh_is_fd($dad_rdr); + $dad_err = qualify $dad_err, $package unless fh_is_fd($dad_err); my $kid_rdr = gensym; my $kid_wtr = gensym; @@ -181,20 +192,20 @@ sub _open3 { # If she wants to dup the kid's stderr onto her stdout I need to # save a copy of her stdout before I put something else there. if ($dad_rdr ne $dad_err && $dup_err - && fileno($dad_err) == fileno(STDOUT)) { + && xfileno($dad_err) == fileno(STDOUT)) { my $tmp = gensym; xopen($tmp, ">&$dad_err"); $dad_err = $tmp; } if ($dup_wtr) { - xopen \*STDIN, "<&$dad_wtr" if fileno(STDIN) != fileno($dad_wtr); + xopen \*STDIN, "<&$dad_wtr" if fileno(STDIN) != xfileno($dad_wtr); } else { xclose $dad_wtr; xopen \*STDIN, "<&=" . fileno $kid_rdr; } if ($dup_rdr) { - xopen \*STDOUT, ">&$dad_rdr" if fileno(STDOUT) != fileno($dad_rdr); + xopen \*STDOUT, ">&$dad_rdr" if fileno(STDOUT) != xfileno($dad_rdr); } else { xclose $dad_rdr; xopen \*STDOUT, ">&=" . fileno $kid_wtr; @@ -204,8 +215,8 @@ sub _open3 { # I have to use a fileno here because in this one case # I'm doing a dup but the filehandle might be a reference # (from the special case above). - xopen \*STDERR, ">&" . fileno $dad_err - if fileno(STDERR) != fileno($dad_err); + xopen \*STDERR, ">&" . xfileno($dad_err) + if fileno(STDERR) != xfileno($dad_err); } else { xclose $dad_err; xopen \*STDERR, ">&=" . fileno $kid_err; @@ -707,6 +707,10 @@ #define Perl_is_utf8_char pPerl->Perl_is_utf8_char #undef is_utf8_char #define is_utf8_char Perl_is_utf8_char +#undef Perl_is_utf8_string +#define Perl_is_utf8_string pPerl->Perl_is_utf8_string +#undef is_utf8_string +#define is_utf8_string Perl_is_utf8_string #undef Perl_is_utf8_alnum #define Perl_is_utf8_alnum pPerl->Perl_is_utf8_alnum #undef is_utf8_alnum @@ -1326,6 +1326,13 @@ Perl_is_utf8_char(pTHXo_ U8 *p) return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p); } +#undef Perl_is_utf8_string +bool +Perl_is_utf8_string(pTHXo_ U8 *s, STRLEN len) +{ + return ((CPerlObj*)pPerl)->Perl_is_utf8_string(s, len); +} + #undef Perl_is_utf8_alnum bool Perl_is_utf8_alnum(pTHXo_ U8 *p) @@ -3352,7 +3359,7 @@ Perl_utf8_to_bytes(pTHXo_ U8 *s, STRLEN len) #undef Perl_bytes_to_utf8 U8* -Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN len) +Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len) { return ((CPerlObj*)pPerl)->Perl_bytes_to_utf8(s, len); } diff --git a/pod/perlapi.pod b/pod/perlapi.pod index f274641029..86ad5bd1bb 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -153,9 +153,10 @@ Found in file av.c =item bytes_to_utf8 Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN len) + U8 * bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -2281,19 +2282,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B<sv.h> -in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C<svtype>. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. =for hackers Found in file sv.h @@ -2938,10 +2939,21 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h +=item U8 *s + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + + bool_utf8_string U8 *s(STRLEN len) + +=for hackers +Found in file utf8.c + =item utf8_to_bytes Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. U8 * utf8_to_bytes(U8 *s, STRLEN len) diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index ce08134532..6b4e971f97 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -4379,9 +4379,18 @@ L</chomp>, and L</join>.) =item sprintf FORMAT, LIST -Returns a string formatted by the usual C<printf> conventions of the -C library function C<sprintf>. See L<sprintf(3)> or L<printf(3)> -on your system for an explanation of the general principles. +Returns a string formatted by the usual C<printf> conventions of the C +library function C<sprintf>. See below for more details +and see L<sprintf(3)> or L<printf(3)> on your system for an explanation of +the general principles. + +For example: + + # Format number with up to 8 leading zeroes + $result = sprintf("%08d", $number); + + # Round number to 3 digits after decimal point + $rounded = sprintf("%.3f", $number); Perl does its own C<sprintf> formatting--it emulates the C function C<sprintf>, but it doesn't use it (except for floating-point @@ -367,6 +367,7 @@ PERL_CALLCONV U32 Perl_to_uni_upper_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_title_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_lower_lc(pTHX_ U32 c); PERL_CALLCONV int Perl_is_utf8_char(pTHX_ U8 *p); +PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len); PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p); @@ -810,7 +811,7 @@ PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U16* p, U8 *d, I32 bytelen); PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN len); -PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN len); +PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len); PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen); PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv); PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv); @@ -2659,7 +2659,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) char *name = GvNAME(sstr); STRLEN len = GvNAMELEN(sstr); sv_upgrade(dstr, SVt_PVGV); - sv_magic(dstr, dstr, '*', name, len); + sv_magic(dstr, dstr, '*', Nullch, 0); GvSTASH(dstr) = (HV*)SvREFCNT_inc(GvSTASH(sstr)); GvNAME(dstr) = savepvn(name, len); GvNAMELEN(dstr) = len; diff --git a/t/lib/peek.t b/t/lib/peek.t index 255512fac5..86fd74a3df 100644 --- a/t/lib/peek.t +++ b/t/lib/peek.t @@ -285,8 +285,6 @@ do_test(17, MG_VIRTUAL = &PL_vtbl_glob MG_TYPE = \'\\*\' MG_OBJ = $ADDR - MG_LEN = 1 - MG_PTR = $ADDR "a" NAME = "a" NAMELEN = 1 GvSTASH = $ADDR\\t"main" @@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) { @@ -227,6 +251,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +272,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +285,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +316,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; } |