diff options
author | Charles Bailey <bailey@newman.upenn.edu> | 2001-01-08 08:53:52 +0000 |
---|---|---|
committer | bailey <bailey@newman.upenn.edu> | 2001-01-08 08:53:52 +0000 |
commit | 0e06870bf080a38cda51c06c6612359afc2334e1 (patch) | |
tree | 763f11122a3b18bc443e808010b970428ab57432 /ext | |
parent | e3830a4ec012ee625f1b3bc63b5b18c656f377da (diff) | |
download | perl-0e06870bf080a38cda51c06c6612359afc2334e1.tar.gz |
Once again syncing after too long an absence
p4raw-id: //depot/vmsperl@8367
Diffstat (limited to 'ext')
80 files changed, 3662 insertions, 468 deletions
diff --git a/ext/B/B.pm b/ext/B/B.pm index 50364fa1d2..591b5811cd 100644 --- a/ext/B/B.pm +++ b/ext/B/B.pm @@ -9,12 +9,17 @@ package B; use XSLoader (); require Exporter; @ISA = qw(Exporter); + +# walkoptree_slow comes from B.pm (you are there), +# walkoptree comes from B.xs @EXPORT_OK = qw(minus_c ppname save_BEGINs class peekop cast_I32 cstring cchar hash threadsv_names - main_root main_start main_cv svref_2object opnumber amagic_generation - walkoptree walkoptree_slow walkoptree_exec walksymtable + main_root main_start main_cv svref_2object opnumber + amagic_generation + walkoptree_slow walkoptree walkoptree_exec walksymtable parents comppadlist sv_undef compile_stats timing_info begin_av init_av end_av); + sub OPf_KIDS (); use strict; @B::SV::ISA = 'B::OBJECT'; @@ -185,7 +190,7 @@ sub walksymtable { *glob = "*main::".$prefix.$sym; if ($sym =~ /::$/) { $sym = $prefix . $sym; - if ($sym ne "main::" && &$recurse($sym)) { + if ($sym ne "main::" && $sym ne "<none>::" && &$recurse($sym)) { walksymtable(\%glob, $method, $recurse, $sym); } } else { @@ -531,6 +536,8 @@ This method returns TRUE if the GP field of the GV is NULL. =item CvFLAGS +=item const_sv + =back =head2 B::HV METHODS diff --git a/ext/B/B.xs b/ext/B/B.xs index f1f0e65781..ec9e578020 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -1229,6 +1229,12 @@ U16 CvFLAGS(cv) B::CV cv +MODULE = B PACKAGE = B::CV PREFIX = cv_ + +B::SV +cv_const_sv(cv) + B::CV cv + MODULE = B PACKAGE = B::HV PREFIX = Hv diff --git a/ext/B/B/C.pm b/ext/B/B/C.pm index d0c8159d9f..dac9417806 100644 --- a/ext/B/B/C.pm +++ b/ext/B/B/C.pm @@ -1020,7 +1020,6 @@ sub output_all { print <<"EOT"; static int $init_name() { - dTHR; dTARG; djSP; EOT @@ -1338,7 +1337,7 @@ sub should_save # Now see if current package looks like an OO class this is probably too strong. foreach my $m (qw(new DESTROY TIESCALAR TIEARRAY TIEHASH TIEHANDLE)) { - if ($package->can($m)) + if (UNIVERSAL::can($package, $m)) { warn "$package has method $m: saving package\n";#debug return mark_package($package); @@ -1368,7 +1367,7 @@ sub walkpackages if ($sym =~ /::$/) { $sym = $prefix . $sym; - if ($sym ne "main::" && &$recurse($sym)) + if ($sym ne "main::" && $sym ne "<none>::" && &$recurse($sym)) { walkpackages(\%glob, $recurse, $sym); } diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index 5c5c5eb9cb..b0a5eaeb38 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -17,7 +17,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber CVf_METHOD CVf_LOCKED CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); -$VERSION = 0.591; +$VERSION = 0.60; use strict; # Changes between 0.50 and 0.51: @@ -83,6 +83,12 @@ use strict; # - added support for Chip's OP_METHOD_NAMED # - added support for Ilya's OPpTARGET_MY optimization # - elided arrows before `()' subscripts when possible +# Changes between 0.59 and 0.60 +# - support for method attribues was added +# - some warnings fixed +# - separate recognition of constant subs +# - rewrote continue block handling, now recoginizing for loops +# - added more control of expanding control structures # Todo: # - finish tr/// changes @@ -93,8 +99,8 @@ use strict; # - left/right context # - recognize `use utf8', `use integer', etc # - treat top-level block specially for incremental output -# - interpret in high bit chars in string as utf8 \x{...} (when?) -# - copy comments (look at real text with $^P?) +# - interpret high bit chars in string as utf8 \x{...} (when?) +# - copy comments (look at real text with $^P?) # - avoid semis in one-statement blocks # - associativity of &&=, ||=, ?: # - ',' => '=>' (auto-unquote?) @@ -108,7 +114,6 @@ use strict; # - version using op_next instead of op_first/sibling? # - avoid string copies (pass arrays, one big join?) # - auto-apply `-u'? -# - while{} with one-statement continue => for(; XXX; XXX) {}? # - -uPackage:: descend recursively? # - here-docs? # - <DATA>? @@ -357,6 +362,8 @@ sub new { $self->{'unquote'} = 1; } elsif (substr($arg, 0, 2) eq "-s") { $self->style_opts(substr $arg, 2); + } elsif ($arg =~ /^-x(\d)$/) { + $self->{'expand'} = $1; } } return $self; @@ -393,6 +400,7 @@ sub deparse { my $self = shift; my($op, $cx) = @_; # cluck if class($op) eq "NULL"; +# cluck unless $op; # return $self->$ {\("pp_" . $op->name)}($op, $cx); my $meth = "pp_" . $op->name; return $self->$meth($op, $cx); @@ -446,6 +454,11 @@ sub deparse_sub { # skip leavesub return $proto . "{\n\t" . $self->deparse($cv->ROOT->first, 0) . "\n\b}\n"; + } + my $sv = $cv->const_sv; + if ($$sv) { + # uh-oh. inlinable sub... format it differently + return $proto . "{ " . const($sv) . " }\n"; } else { # XSUB? return $proto . "{}\n"; } @@ -679,70 +692,69 @@ sub pp_entertry { # see also leavetry return "XXX"; } -# leave and scope/lineseq should probably share code -sub pp_leave { +sub lineseq { my $self = shift; - my($op, $cx) = @_; - my ($kid, $expr); - my @exprs; - local($self->{'curstash'}) = $self->{'curstash'}; - $kid = $op->first->sibling; # skip enter - if (is_miniwhile($kid)) { - my $top = $kid->first; - my $name = $top->name; - if ($name eq "and") { - $name = "while"; - } elsif ($name eq "or") { - $name = "until"; - } else { # no conditional -> while 1 or until 0 - return $self->deparse($top->first, 1) . " while 1"; - } - my $cond = $top->first; - my $body = $cond->sibling->first; # skip lineseq - $cond = $self->deparse($cond, 1); - $body = $self->deparse($body, 1); - return "$body $name $cond"; - } - for (; !null($kid); $kid = $kid->sibling) { + my(@ops) = @_; + my($expr, @exprs); + for (my $i = 0; $i < @ops; $i++) { $expr = ""; - if (is_state $kid) { - $expr = $self->deparse($kid, 0); - $kid = $kid->sibling; - last if null $kid; + if (is_state $ops[$i]) { + $expr = $self->deparse($ops[$i], 0); + $i++; + last if $i > $#ops; } - $expr .= $self->deparse($kid, 0); + if (!is_state $ops[$i] and $ops[$i+1] and !null($ops[$i+1]) and + $ops[$i+1]->name eq "leaveloop" and $self->{'expand'} < 3) + { + push @exprs, $expr . $self->for_loop($ops[$i], 0); + $i++; + next; + } + $expr .= $self->deparse($ops[$i], 0); push @exprs, $expr if length $expr; } - if ($cx > 0) { # inside an expression - return "do { " . join(";\n", @exprs) . " }"; - } else { - return join(";\n", @exprs) . ";"; - } + return join(";\n", @exprs); } -sub pp_scope { - my $self = shift; - my($op, $cx) = @_; - my ($kid, $expr); - my @exprs; - for ($kid = $op->first; !null($kid); $kid = $kid->sibling) { - $expr = ""; - if (is_state $kid) { - $expr = $self->deparse($kid, 0); - $kid = $kid->sibling; - last if null $kid; +sub scopeop { + my($real_block, $self, $op, $cx) = @_; + my $kid; + my @kids; + local($self->{'curstash'}) = $self->{'curstash'} if $real_block; + if ($real_block) { + $kid = $op->first->sibling; # skip enter + if (is_miniwhile($kid)) { + my $top = $kid->first; + my $name = $top->name; + if ($name eq "and") { + $name = "while"; + } elsif ($name eq "or") { + $name = "until"; + } else { # no conditional -> while 1 or until 0 + return $self->deparse($top->first, 1) . " while 1"; + } + my $cond = $top->first; + my $body = $cond->sibling->first; # skip lineseq + $cond = $self->deparse($cond, 1); + $body = $self->deparse($body, 1); + return "$body $name $cond"; } - $expr .= $self->deparse($kid, 0); - push @exprs, $expr if length $expr; + } else { + $kid = $op->first; + } + for (; !null($kid); $kid = $kid->sibling) { + push @kids, $kid; } if ($cx > 0) { # inside an expression, (a do {} while for lineseq) - return "do { " . join(";\n", @exprs) . " }"; + return "do { " . $self->lineseq(@kids) . " }"; } else { - return join(";\n", @exprs) . ";"; + return $self->lineseq(@kids) . ";"; } } -sub pp_lineseq { pp_scope(@_) } +sub pp_scope { scopeop(0, @_); } +sub pp_lineseq { scopeop(0, @_); } +sub pp_leave { scopeop(1, @_); } # The BEGIN {} is used here because otherwise this code isn't executed # when you run B::Deparse on itself. @@ -1380,11 +1392,14 @@ sub logop { my ($op, $cx, $lowop, $lowprec, $highop, $highprec, $blockname) = @_; my $left = $op->first; my $right = $op->first->sibling; - if ($cx == 0 and is_scope($right) and $blockname) { # if ($a) {$b} + if ($cx == 0 and is_scope($right) and $blockname + and $self->{'expand'} < 7) + { # if ($a) {$b} $left = $self->deparse($left, 1); $right = $self->deparse($right, 0); return "$blockname ($left) {\n\t$right\n\b}\cK"; - } elsif ($cx == 0 and $blockname and not $self->{'parens'}) { # $b if $a + } elsif ($cx == 0 and $blockname and not $self->{'parens'} + and $self->{'expand'} < 7) { # $b if $a $right = $self->deparse($right, 1); $left = $self->deparse($left, 1); return "$right $blockname $left"; @@ -1675,7 +1690,8 @@ sub pp_cond_expr { my $false = $true->sibling; my $cuddle = $self->{'cuddle'}; unless ($cx == 0 and (is_scope($true) and $true->name ne "null") and - (is_scope($false) || is_ifelse_cont($false))) { + (is_scope($false) || is_ifelse_cont($false)) + and $self->{'expand'} < 7) { $cond = $self->deparse($cond, 8); $true = $self->deparse($true, 8); $false = $self->deparse($false, 8); @@ -1704,20 +1720,24 @@ sub pp_cond_expr { return $head . join($cuddle, "", @elsifs) . $false; } -sub pp_leaveloop { +sub loop_common { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $init) = @_; my $enter = $op->first; my $kid = $enter->sibling; local($self->{'curstash'}) = $self->{'curstash'}; my $head = ""; my $bare = 0; + my $body; + my $cond = undef; if ($kid->name eq "lineseq") { # bare or infinite loop if (is_state $kid->last) { # infinite $head = "for (;;) "; # shorter than while (1) + $cond = ""; } else { $bare = 1; } + $body = $kid; } elsif ($enter->name eq "enteriter") { # foreach my $ary = $enter->first->sibling; # first was pushmark my $var = $ary->sibling; @@ -1749,62 +1769,60 @@ sub pp_leaveloop { $var = "\$" . $self->deparse($var, 1); } $head = "foreach $var ($ary) "; - $kid = $kid->first->first->sibling; # skip OP_AND and OP_ITER + $body = $kid->first->first->sibling; # skip OP_AND and OP_ITER } elsif ($kid->name eq "null") { # while/until $kid = $kid->first; - my $name = {"and" => "while", "or" => "until"} - ->{$kid->name}; - $head = "$name (" . $self->deparse($kid->first, 1) . ") "; - $kid = $kid->first->sibling; + my $name = {"and" => "while", "or" => "until"}->{$kid->name}; + $cond = $self->deparse($kid->first, 1); + $head = "$name ($cond) "; + $body = $kid->first->sibling; } elsif ($kid->name eq "stub") { # bare and empty return "{;}"; # {} could be a hashref } - # The third-to-last kid is the continue block if the pointer used - # by `next BLOCK' points to its first OP, which happens to be the - # the op_next of the head of the _previous_ statement. - # Unless it's a bare loop, in which case it's last, since there's - # no unstack or extra nextstate. - # Except if the previous head isn't null but the first kid is - # (because it's a nulled out nextstate in a scope), in which - # case the head's next is advanced past the null but the nextop's - # isn't, so we need to try nextop->next. - my $precont; - my $cont = $kid->first; - if ($bare) { - while (!null($cont->sibling)) { - $precont = $cont; - $cont = $cont->sibling; - } - } else { - while (!null($cont->sibling->sibling->sibling)) { - $precont = $cont; - $cont = $cont->sibling; + # If there isn't a continue block, then the next pointer for the loop + # will point to the unstack, which is kid's penultimate child, except + # in a bare loop, when it will point to the leaveloop. When neither of + # these conditions hold, then the third-to-last child in the continue + # block (or the last in a bare loop). + my $cont_start = $enter->nextop; + my $cont; + if ($$cont_start != $$op and $ {$cont_start->sibling} != $ {$body->last}) { + if ($bare) { + $cont = $body->last; + } else { + $cont = $body->first; + while (!null($cont->sibling->sibling->sibling)) { + $cont = $cont->sibling; + } + } + my $state = $body->first; + my $cuddle = $self->{'cuddle'}; + my @states; + for (; $$state != $$cont; $state = $state->sibling) { + push @states, $state; + } + $body = $self->lineseq(@states); + if (defined $cond and not is_scope $cont and $self->{'expand'} < 3) { + $head = "for ($init; $cond; " . $self->deparse($cont, 1) .") "; + $cont = "\cK"; + } else { + $cont = $cuddle . "continue {\n\t" . + $self->deparse($cont, 0) . "\n\b}\cK"; } - } - if ($precont and $ {$precont->next} == $ {$enter->nextop} - || $ {$precont->next} == $ {$enter->nextop->next} ) - { - my $state = $kid->first; - my $cuddle = $self->{'cuddle'}; - my($expr, @exprs); - for (; $$state != $$cont; $state = $state->sibling) { - $expr = ""; - if (is_state $state) { - $expr = $self->deparse($state, 0); - $state = $state->sibling; - last if null $state; - } - $expr .= $self->deparse($state, 0); - push @exprs, $expr if $expr; - } - $kid = join(";\n", @exprs); - $cont = $cuddle . "continue {\n\t" . - $self->deparse($cont, 0) . "\n\b}\cK"; } else { $cont = "\cK"; - $kid = $self->deparse($kid, 0); + $body = $self->deparse($body, 0); } - return $head . "{\n\t" . $kid . "\n\b}" . $cont; + return $head . "{\n\t" . $body . "\n\b}" . $cont; +} + +sub pp_leaveloop { loop_common(@_, "") } + +sub for_loop { + my $self = shift; + my($op, $cx) = @_; + my $init = $self->deparse($op, 1); + return $self->loop_common($op->sibling, $cx, $init); } sub pp_leavetry { @@ -2851,8 +2869,8 @@ B::Deparse - Perl compiler backend to produce perl code =head1 SYNOPSIS -B<perl> B<-MO=Deparse>[B<,-u>I<PACKAGE>][B<,-p>][B<,-q>][B<,-l>][B<,-s>I<LETTERS>] - I<prog.pl> +B<perl> B<-MO=Deparse>[B<,-u>I<PACKAGE>][B<,-p>][B<,-q>][B<,-l>] + [B<,-s>I<LETTERS>][B<,-x>I<LEVEL>] I<prog.pl> =head1 DESCRIPTION @@ -2997,6 +3015,55 @@ file is compiled as a main program. =back +=item B<-x>I<LEVEL> + +Expand conventional syntax constructions into equivalent ones that expose +their internal operation. I<LEVEL> should be a digit, with higher values +meaning more expansion. As with B<-q>, this actually involves turning off +special cases in B::Deparse's normal operations. + +If I<LEVEL> is at least 3, for loops will be translated into equivalent +while loops with continue blocks; for instance + + for ($i = 0; $i < 10; ++$i) { + print $i; + } + +turns into + + $i = 0; + while ($i < 10) { + print $i; + } continue { + ++$i + } + +Note that in a few cases this translation can't be perfectly carried back +into the source code -- if the loop's initializer declares a my variable, +for instance, it won't have the correct scope outside of the loop. + +If I<LEVEL> is at least 7, if statements will be translated into equivalent +expressions using C<&&>, C<?:> and C<do {}>; for instance + + print 'hi' if $nice; + if ($nice) { + print 'hi'; + } + if ($nice) { + print 'hi'; + } else { + print 'bye'; + } + +turns into + + $nice and print 'hi'; + $nice and do { print 'hi' }; + $nice ? do { print 'hi' } : do { print 'bye' }; + +Long sequences of elsifs will turn into nested ternary operators, which +B::Deparse doesn't know how to indent nicely. + =back =head1 USING B::Deparse AS A MODULE @@ -3043,7 +3110,7 @@ See the 'to do' list at the beginning of the module file. =head1 AUTHOR -Stephen McCamant <smccam@uclink4.berkeley.edu>, based on an earlier +Stephen McCamant <smcc@CSUA.Berkeley.EDU>, based on an earlier version by Malcolm Beattie <mbeattie@sable.ox.ac.uk>, with contributions from Gisle Aas, James Duncan, Albert Dvornik, Hugo van der Sanden, Gurusamy Sarathy, and Nick Ing-Simmons. diff --git a/ext/B/B/Lint.pm b/ext/B/B/Lint.pm index ed0d07dfcb..094b3cf8fd 100644 --- a/ext/B/B/Lint.pm +++ b/ext/B/B/Lint.pm @@ -116,7 +116,7 @@ Malcolm Beattie, mbeattie@sable.ox.ac.uk. =cut use strict; -use B qw(walkoptree_slow main_root walksymtable svref_2object parents +use B qw(walkoptree main_root walksymtable svref_2object parents OPf_WANT_LIST OPf_WANT OPf_STACKED G_ARRAY ); @@ -277,12 +277,12 @@ sub B::GV::lintcv { return if !$$cv || $done_cv{$$cv}++; my $root = $cv->ROOT; #warn " root = $root (0x$$root)\n";#debug - walkoptree_slow($root, "lint") if $$root; + walkoptree($root, "lint") if $$root; } sub do_lint { my %search_pack; - walkoptree_slow(main_root, "lint") if ${main_root()}; + walkoptree(main_root, "lint") if ${main_root()}; # Now do subs in main no strict qw(vars refs); diff --git a/ext/B/B/Terse.pm b/ext/B/B/Terse.pm index 66b5cfc2f2..a7a071e937 100644 --- a/ext/B/B/Terse.pm +++ b/ext/B/B/Terse.pm @@ -1,6 +1,6 @@ package B::Terse; use strict; -use B qw(peekop class walkoptree_slow walkoptree_exec +use B qw(peekop class walkoptree walkoptree_exec walkoptree_slow main_start main_root cstring svref_2object); use B::Asmdata qw(@specialsv_name); diff --git a/ext/ByteLoader/ByteLoader.xs b/ext/ByteLoader/ByteLoader.xs index d3b435199e..05b795ca25 100644 --- a/ext/ByteLoader/ByteLoader.xs +++ b/ext/ByteLoader/ByteLoader.xs @@ -77,7 +77,6 @@ bl_read(struct byteloader_fdata *data, char *buf, size_t size, size_t n) static I32 byteloader_filter(pTHXo_ int idx, SV *buf_sv, int maxlen) { - dTHR; OP *saveroot = PL_main_root; OP *savestart = PL_main_start; struct byteloader_state bstate; diff --git a/ext/ByteLoader/byterun.c b/ext/ByteLoader/byterun.c index 19f1f6b44c..3e12790fb0 100644 --- a/ext/ByteLoader/byterun.c +++ b/ext/ByteLoader/byterun.c @@ -54,7 +54,6 @@ bset_obj_store(pTHXo_ struct byteloader_state *bstate, void *obj, I32 ix) void byterun(pTHXo_ register struct byteloader_state *bstate) { - dTHR; register int insn; U32 ix; SV *specialsv_list[6]; diff --git a/ext/DB_File/Changes b/ext/DB_File/Changes index ad54382d63..eda270d82b 100644 --- a/ext/DB_File/Changes +++ b/ext/DB_File/Changes @@ -292,7 +292,45 @@ the updates to the documentation and writing DB_File::Lock (available on CPAN). -1.73 27th April 2000 +1.73 31st May 2000 * Added support in version.c for building with threaded Perl. + * Berkeley DB 3.1 has reenabled support for null keys. The test + harness has been updated to reflect this. + +1.74 10th December 2000 + + * A "close" call in DB_File.xs needed parenthesised to stop win32 from + thinking it was one of its macros. + + * Updated dbinfo to support Berkeley DB 3.1 file format changes. + + * DB_File.pm & the test hasness now use the warnings pragma (when + available). + + * Included Perl core patch 7703 -- size argument for hash_cb is different + for Berkeley DB 3.x + + * Included Perl core patch 7801 -- Give __getBerkeleyDBInfo the ANSI C + treatment. + + * @a = () produced the warning 'Argument "" isn't numeric in entersub' + This has been fixed. Thanks to Edward Avis for spotting this bug. + + * Added note about building under Linux. Included patches. + + * Included Perl core patch 8068 -- fix for bug 20001013.009 + When run with warnings enabled "$hash{XX} = undef " produced an + "Uninitialized value" warning. This has been fixed. + +1.75 17th December 2000 + + * Fixed perl core patch 7703 + + * Added suppport to allow DB_File to be built with Berkeley DB 3.2 -- + btree_compare, btree_prefix and hash_cb needed to be changed. + + * Updated dbinfo to support Berkeley DB 3.2 file format changes. + + diff --git a/ext/DB_File/DB_File.pm b/ext/DB_File/DB_File.pm index a1ec0e6362..c8302168f8 100644 --- a/ext/DB_File/DB_File.pm +++ b/ext/DB_File/DB_File.pm @@ -1,8 +1,8 @@ # DB_File.pm -- Perl 5 interface to Berkeley DB # # written by Paul Marquess (Paul.Marquess@btinternet.com) -# last modified 26th April 2000 -# version 1.73 +# last modified 17th December 2000 +# version 1.75 # # Copyright (c) 1995-2000 Paul Marquess. All rights reserved. # This program is free software; you can redistribute it and/or @@ -13,6 +13,7 @@ package DB_File::HASHINFO ; require 5.003 ; +use warnings; use strict; use Carp; require Tie::Hash; @@ -104,6 +105,7 @@ sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } package DB_File::RECNOINFO ; +use warnings; use strict ; @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; @@ -121,6 +123,7 @@ sub TIEHASH package DB_File::BTREEINFO ; +use warnings; use strict ; @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; @@ -140,6 +143,7 @@ sub TIEHASH package DB_File ; +use warnings; use strict; use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version $use_XSLoader @@ -147,7 +151,7 @@ use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO use Carp; -$VERSION = "1.73" ; +$VERSION = "1.75" ; #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; $DB_BTREE = new DB_File::BTREEINFO ; @@ -271,7 +275,7 @@ sub TIEARRAY sub CLEAR { my $self = shift; - my $key = "" ; + my $key = 0 ; my $value = "" ; my $status = $self->seq($key, $value, R_FIRST()); my @keys; @@ -665,6 +669,7 @@ This example shows how to create a database, add key/value pairs to the database, delete keys/value pairs and finally how to enumerate the contents of the database. + use warnings ; use strict ; use DB_File ; use vars qw( %h $k $v ) ; @@ -715,6 +720,7 @@ This script shows how to override the default sorting algorithm that BTREE uses. Instead of using the normal lexical ordering, a case insensitive compare function will be used. + use warnings ; use strict ; use DB_File ; @@ -783,6 +789,7 @@ There are some difficulties in using the tied hash interface if you want to manipulate a BTREE database with duplicate keys. Consider this code: + use warnings ; use strict ; use DB_File ; @@ -837,6 +844,7 @@ and the API in general. Here is the script above rewritten using the C<seq> API method. + use warnings ; use strict ; use DB_File ; @@ -908,6 +916,7 @@ particular value occurred in the BTREE. So assuming the database created above, we can use C<get_dup> like this: + use warnings ; use strict ; use DB_File ; @@ -957,6 +966,7 @@ returns 0. Otherwise the method returns a non-zero value. Assuming the database from the previous example: + use warnings ; use strict ; use DB_File ; @@ -995,6 +1005,7 @@ Otherwise the method returns a non-zero value. Again assuming the existence of the C<tree> database + use warnings ; use strict ; use DB_File ; @@ -1039,6 +1050,7 @@ the use of the R_CURSOR flag with seq: In the example script below, the C<match> sub uses this feature to find and print the first matching key/value pair given a partial key. + use warnings ; use strict ; use DB_File ; use Fcntl ; @@ -1143,6 +1155,7 @@ Here is a simple example that uses RECNO (if you are using a version of Perl earlier than 5.004_57 this example won't work -- see L<Extra RECNO Methods> for a workaround). + use warnings ; use strict ; use DB_File ; @@ -1232,6 +1245,7 @@ Here is a more complete example that makes use of some of the methods described above. It also makes use of the API interface directly (see L<THE API INTERFACE>). + use warnings ; use strict ; use vars qw(@h $H $file $i) ; use DB_File ; @@ -1583,6 +1597,7 @@ the database and have them removed when you read from the database. As I'm sure you have already guessed, this is a problem that DBM Filters can fix very easily. + use warnings ; use strict ; use DB_File ; @@ -1625,6 +1640,7 @@ when reading. Here is a DBM Filter that does it: + use warnings ; use strict ; use DB_File ; my %hash ; @@ -1791,6 +1807,7 @@ Here is a snippet of code that is loosely based on Tom Christiansen's I<ggh> script (available from your nearest CPAN archive in F<authors/id/TOMC/scripts/nshist.gz>). + use warnings ; use strict ; use DB_File ; use Fcntl ; @@ -1947,6 +1964,7 @@ You will encounter this particular error message when you have the C<strict 'subs'> pragma (or the full strict pragma) in your script. Consider this script: + use warnings ; use strict ; use DB_File ; use vars qw(%x) ; diff --git a/ext/DB_File/DB_File.xs b/ext/DB_File/DB_File.xs index cb8fd80385..fa3bb336c2 100644 --- a/ext/DB_File/DB_File.xs +++ b/ext/DB_File/DB_File.xs @@ -3,8 +3,8 @@ DB_File.xs -- Perl 5 interface to Berkeley DB written by Paul Marquess <Paul.Marquess@btinternet.com> - last modified 27th April 2000 - version 1.73 + last modified 17 December 2000 + version 1.75 All comments/suggestions/problems are welcome @@ -83,6 +83,13 @@ Rewrote push 1.72 - No change to DB_File.xs 1.73 - No change to DB_File.xs + 1.74 - A call to open needed parenthesised to stop it clashing + with a win32 macro. + Added Perl core patches 7703 & 7801. + 1.75 - Fixed Perl core patch 7703. + Added suppport to allow DB_File to be built with + Berkeley DB 3.2 -- btree_compare, btree_prefix and hash_cb + needed to be changed. */ @@ -128,6 +135,10 @@ # include <db.h> #endif +#ifdef CAN_PROTOTYPE +extern void __getBerkeleyDBInfo(void); +#endif + #ifndef pTHX # define pTHX # define pTHX_ @@ -159,6 +170,10 @@ # define BERKELEY_DB_1_OR_2 #endif +#if DB_VERSION_MAJOR > 3 || (DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR >= 2) +# define AT_LEAST_DB_3_2 +#endif + /* map version 2 features & constants onto their version 1 equivalent */ #ifdef DB_Prefix_t @@ -244,6 +259,7 @@ typedef db_recno_t recno_t; #else /* db version 1.x */ +#define BERKELEY_DB_1 #define BERKELEY_DB_1_OR_2 typedef union INFO { @@ -473,6 +489,19 @@ u_int flags ; static int +#ifdef AT_LEAST_DB_3_2 + +#ifdef CAN_PROTOTYPE +btree_compare(DB * db, const DBT *key1, const DBT *key2) +#else +btree_compare(db, key1, key2) +DB * db ; +const DBT * key1 ; +const DBT * key2 ; +#endif /* CAN_PROTOTYPE */ + +#else /* Berkeley DB < 3.2 */ + #ifdef CAN_PROTOTYPE btree_compare(const DBT *key1, const DBT *key2) #else @@ -480,6 +509,9 @@ btree_compare(key1, key2) const DBT * key1 ; const DBT * key2 ; #endif + +#endif + { #ifdef dTHX dTHX; @@ -529,6 +561,19 @@ const DBT * key2 ; } static DB_Prefix_t +#ifdef AT_LEAST_DB_3_2 + +#ifdef CAN_PROTOTYPE +btree_prefix(DB * db, const DBT *key1, const DBT *key2) +#else +btree_prefix(db, key1, key2) +Db * db ; +const DBT * key1 ; +const DBT * key2 ; +#endif + +#else /* Berkeley DB < 3.2 */ + #ifdef CAN_PROTOTYPE btree_prefix(const DBT *key1, const DBT *key2) #else @@ -536,6 +581,8 @@ btree_prefix(key1, key2) const DBT * key1 ; const DBT * key2 ; #endif + +#endif { #ifdef dTHX dTHX; @@ -584,13 +631,35 @@ const DBT * key2 ; return (retval) ; } + +#ifdef BERKELEY_DB_1 +# define HASH_CB_SIZE_TYPE size_t +#else +# define HASH_CB_SIZE_TYPE u_int32_t +#endif + static DB_Hash_t +#ifdef AT_LEAST_DB_3_2 + #ifdef CAN_PROTOTYPE -hash_cb(const void *data, size_t size) +hash_cb(DB * db, const void *data, u_int32_t size) +#else +hash_cb(db, data, size) +DB * db ; +const void * data ; +HASH_CB_SIZE_TYPE size ; +#endif + +#else /* Berkeley DB < 3.2 */ + +#ifdef CAN_PROTOTYPE +hash_cb(const void *data, HASH_CB_SIZE_TYPE size) #else hash_cb(data, size) const void * data ; -size_t size ; +HASH_CB_SIZE_TYPE size ; +#endif + #endif { #ifdef dTHX @@ -1266,7 +1335,7 @@ SV * sv ; Flags |= DB_TRUNCATE ; #endif - status = RETVAL->dbp->open(RETVAL->dbp, name, NULL, RETVAL->type, + status = (RETVAL->dbp->open)(RETVAL->dbp, name, NULL, RETVAL->type, Flags, mode) ; /* printf("open returned %d %s\n", status, db_strerror(status)) ; */ diff --git a/ext/DB_File/dbinfo b/ext/DB_File/dbinfo index 701ac612b6..5a4df15907 100644 --- a/ext/DB_File/dbinfo +++ b/ext/DB_File/dbinfo @@ -4,10 +4,10 @@ # a database file # # Author: Paul Marquess <Paul.Marquess@btinternet.com> -# Version: 1.02 -# Date 20th August 1999 +# Version: 1.03 +# Date 17th September 2000 # -# Copyright (c) 1998 Paul Marquess. All rights reserved. +# Copyright (c) 1998-2000 Paul Marquess. All rights reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. @@ -28,7 +28,8 @@ my %Data = 4 => "Unknown", 5 => "2.0.0 -> 2.3.0", 6 => "2.3.1 -> 2.7.7", - 7 => "3.0.0 or greater", + 7 => "3.0.x", + 8 => "3.1.x or greater", } }, 0x061561 => { @@ -40,14 +41,17 @@ my %Data = 3 => "1.86", 4 => "2.0.0 -> 2.1.0", 5 => "2.2.6 -> 2.7.7", - 6 => "3.0.0 or greater", + 6 => "3.0.x", + 7 => "3.1.x or greater", } }, 0x042253 => { Type => "Queue", Versions => { - 1 => "3.0.0 or greater", + 1 => "3.0.x", + 2 => "3.1.x", + 3 => "3.2.x or greater", } }, ) ; @@ -86,7 +90,7 @@ else { die "not a Berkeley DB database file.\n" } my $type = $Data{$magic} ; -my $magic = sprintf "%06X", $magic ; +$magic = sprintf "%06X", $magic ; my $ver_string = "Unknown" ; $ver_string = $type->{Versions}{$version} diff --git a/ext/DB_File/typemap b/ext/DB_File/typemap index 41a24f4a86..55439ee76d 100644 --- a/ext/DB_File/typemap +++ b/ext/DB_File/typemap @@ -1,8 +1,8 @@ # typemap for Perl 5 interface to Berkeley # # written by Paul Marquess <Paul.Marquess@btinternet.com> -# last modified 7th September 1999 -# version 1.71 +# last modified 10th December 2000 +# version 1.74 # #################################### DB SECTION # @@ -29,9 +29,10 @@ T_dbtkeydatum T_dbtdatum ckFilter($arg, filter_store_value, \"filter_store_value\"); DBT_clear($var) ; - $var.data = SvPV($arg, PL_na); - $var.size = (int)PL_na; - + if (SvOK($arg)) { + $var.data = SvPV($arg, PL_na); + $var.size = (int)PL_na; + } OUTPUT diff --git a/ext/DB_File/version.c b/ext/DB_File/version.c index f3e2c947c8..6e55b2e3d1 100644 --- a/ext/DB_File/version.c +++ b/ext/DB_File/version.c @@ -17,6 +17,8 @@ Support for Berkeley DB 2/3's backward compatability mode. 1.72 - No change. 1.73 - Added support for threading + 1.74 - Added Perl core patch 7801. + */ @@ -27,7 +29,11 @@ #include <db.h> void +#ifdef CAN_PROTOTYPE +__getBerkeleyDBInfo(void) +#else __getBerkeleyDBInfo() +#endif { #ifdef dTHX dTHX; diff --git a/ext/Devel/DProf/DProf.xs b/ext/Devel/DProf/DProf.xs index 7167a0028f..8f28c6eb33 100644 --- a/ext/Devel/DProf/DProf.xs +++ b/ext/Devel/DProf/DProf.xs @@ -3,11 +3,6 @@ #include "perl.h" #include "XSUB.h" -/* For older Perls */ -#ifndef dTHR -# define dTHR int dummy_thr -#endif /* dTHR */ - /*#define DBG_SUB 1 */ /*#define DBG_TIMER 1 */ @@ -388,7 +383,6 @@ prof_mark(pTHX_ opcode ptype) static void test_time(pTHX_ clock_t *r, clock_t *u, clock_t *s) { - dTHR; CV *cv = perl_get_cv("Devel::DProf::NONESUCH_noxs", FALSE); int i, j, k = 0; HV *oldstash = PL_curstash; diff --git a/ext/Devel/Peek/Peek.xs b/ext/Devel/Peek/Peek.xs index e5fc8ae2c9..312f5f84ba 100644 --- a/ext/Devel/Peek/Peek.xs +++ b/ext/Devel/Peek/Peek.xs @@ -140,6 +140,7 @@ struct mstats_buffer void _fill_mstats(struct mstats_buffer *b, int level) { + dTHX; b->buffer.nfree = b->buf; b->buffer.ntotal = b->buf + _NBUCKETS; b->buffer.bucket_mem_size = b->buf + 2*_NBUCKETS; @@ -151,6 +152,7 @@ _fill_mstats(struct mstats_buffer *b, int level) void fill_mstats(SV *sv, int level) { + dTHX; int nbuckets; struct mstats_buffer buf; @@ -166,6 +168,7 @@ fill_mstats(SV *sv, int level) void _mstats_to_hv(HV *hv, struct mstats_buffer *b, int level) { + dTHX; SV **svp; int type; diff --git a/ext/DynaLoader/DynaLoader_pm.PL b/ext/DynaLoader/DynaLoader_pm.PL index b7b45d8372..266c9d030f 100644 --- a/ext/DynaLoader/DynaLoader_pm.PL +++ b/ext/DynaLoader/DynaLoader_pm.PL @@ -1,4 +1,3 @@ - use Config; sub to_string { @@ -12,7 +11,7 @@ unlink "DynaLoader.pm" if -f "DynaLoader.pm"; open OUT, ">DynaLoader.pm" or die $!; print OUT <<'EOT'; -# Generated from DynaLoader.pm.PL (resolved %Config::Config values) +# Generated from DynaLoader.pm.PL package DynaLoader; @@ -28,11 +27,15 @@ package DynaLoader; # # Tim.Bunce@ig.co.uk, August 1994 -$VERSION = "1.04"; # avoid typo warning +use vars qw($VERSION *AUTOLOAD); + +$VERSION = 1.04; # avoid typo warning require AutoLoader; *AUTOLOAD = \&AutoLoader::AUTOLOAD; +use Config; + # The following require can't be removed during maintenance # releases, sadly, because of the risk of buggy code that does # require Carp; Carp::croak "..."; without brackets dying @@ -40,7 +43,6 @@ require AutoLoader; # We'll let those bugs get found on the development track. require Carp if $] < 5.00450; - # enable debug/trace messages from DynaLoader perl code $dl_debug = $ENV{PERL_DL_DEBUG} || 0 unless defined $dl_debug; @@ -71,48 +73,112 @@ print OUT <<'EOT'; # (VMS support by Charles Bailey <bailey@HMIVAX.HUMGEN.UPENN.EDU>) # See dl_expandspec() for more details. Should be harmless but # inefficient to define on systems that don't need it. -$do_expand = $Is_VMS = $^O eq 'VMS'; +$Is_VMS = $^O eq 'VMS'; +$do_expand = $Is_VMS; $Is_MacOS = $^O eq 'MacOS'; @dl_require_symbols = (); # names of symbols we need @dl_resolve_using = (); # names of files to link with @dl_library_path = (); # path to look for files -#@dl_librefs = (); # things we have loaded -#@dl_modules = (); # Modules we have loaded +@dl_librefs = (); # things we have loaded +@dl_modules = (); # Modules we have loaded # This is a fix to support DLD's unfortunate desire to relink -lc @dl_resolve_using = dl_findfile('-lc') if $dlsrc eq "dl_dld.xs"; -# Initialise @dl_library_path with the 'standard' library path -# for this platform as determined by Configure +EOT -# push(@dl_library_path, split(' ', $Config::Config{'libpth'}); +my $cfg_dl_library_path = <<'EOT'; +push(@dl_library_path, split(' ', $Config::Config{libpth})); EOT -print OUT "push(\@dl_library_path, split(' ', ", - to_string($Config::Config{'libpth'}), "));\n"; +sub dquoted_comma_list { + join(", ", map {qq("$_")} @_); +} -print OUT <<'EOT'; +if ($ENV{PERL_BUILD_EXPAND_CONFIG_VARS}) { + eval $cfg_dl_library_path; + if (!$ENV{PERL_BUILD_EXPAND_ENV_VARS}) { + my $dl_library_path = dquoted_comma_list(@dl_library_path); + print OUT <<EOT; +# The below \@dl_library_path has been expanded (%Config) in Perl build time. + +\@dl_library_path = ($dl_library_path); + +EOT + } +} +else { + print OUT <<EOT; +# Initialise \@dl_library_path with the 'standard' library path +# for this platform as determined by Configure. + +$cfg_dl_library_path + +EOT +} + +my $ldlibpthname; +my $ldlibpthname_defined; +my $pthsep; + +if ($ENV{PERL_BUILD_EXPAND_CONFIG_VARS}) { + $ldlibpthname = $Config::Config{ldlibpthname}; + $ldlibpthname_defined = defined $Config::Config{ldlibpthname} ? 1 : 0; + $pthsep = $Config::Config{path_sep}; +} +else { + $ldlibpthname = q($Config::Config{ldlibpthname}); + $ldlibpthname_defined = q(defined $Config::Config{ldlibpthname}); + $pthsep = q($Config::Config{path_sep}); + print OUT <<EOT; +my \$ldlibpthname = $ldlibpthname; +my \$ldlibpthname_defined = $ldlibpthname_defined; +my \$pthsep = $pthsep; + +EOT +} + +my $env_dl_library_path = <<'EOT'; +if ($ldlibpthname_defined && + exists $ENV{$ldlibpthname}) { + push(@dl_library_path, split(/$pthsep/, $ENV{$ldlibpthname})); +} -# Add to @dl_library_path any extra directories we can gather -# from environment variables. -if ($Is_MacOS) { - push(@dl_library_path, split(/,/, $ENV{LD_LIBRARY_PATH})) - if exists $ENV{LD_LIBRARY_PATH}; -} else { - push(@dl_library_path, split(/:/, $ENV{$Config::Config{ldlibpthname}})) - if exists $Config::Config{ldlibpthname} && - $Config::Config{ldlibpthname} ne '' && - exists $ENV{$Config::Config{ldlibpthname}} ;; - push(@dl_library_path, split(/:/, $ENV{$Config::Config{ldlibpthname}})) - if exists $Config::Config{ldlibpthname} && - $Config::Config{ldlibpthname} ne '' && - exists $ENV{$Config::Config{ldlibpthname}} ;; # E.g. HP-UX supports both its native SHLIB_PATH *and* LD_LIBRARY_PATH. -push(@dl_library_path, split(/:/, $ENV{LD_LIBRARY_PATH})) - if exists $ENV{LD_LIBRARY_PATH}; + +if ($ldlibpthname_defined && + $ldlibpthname ne 'LD_LIBRARY_PATH' && + exists $ENV{LD_LIBRARY_PATH}) { + push(@dl_library_path, split(/$pthsep/, $ENV{LD_LIBRARY_PATH})); } +EOT + +if ($ENV{PERL_BUILD_EXPAND_CONFIG_VARS} && $ENV{PERL_BUILD_EXPAND_ENV_VARS}) { + eval $env_dl_library_path; +} +else { + print OUT <<EOT; +# Add to \@dl_library_path any extra directories we can gather from environment +# during runtime. + +$env_dl_library_path +EOT +} + +if ($ENV{PERL_BUILD_EXPAND_CONFIG_VARS} && $ENV{PERL_BUILD_EXPAND_ENV_VARS}) { + my $dl_library_path = dquoted_comma_list(@dl_library_path); + print OUT <<EOT; +# The below \@dl_library_path has been expanded (%Config, %ENV) +# in Perl build time. + +\@dl_library_path = ($dl_library_path); + +EOT +} + +print OUT <<'EOT'; # No prizes for guessing why we don't say 'bootstrap DynaLoader;' here. # NOTE: All dl_*.xs (including dl_none.xs) define a dl_error() XSUB boot_DynaLoader('DynaLoader') if defined(&boot_DynaLoader) && @@ -198,7 +264,7 @@ sub bootstrap { croak("Can't locate loadable object for module $module in \@INC (\@INC contains: @INC)") unless $file; # wording similar to error from 'require' - $file = uc($file) if $Is_VMS && $Config{d_vms_case_sensitive_symbols}; + $file = uc($file) if $Is_VMS && $Config::Config{d_vms_case_sensitive_symbols}; my $bootname = "boot_$module"; $bootname =~ s/\W/_/g; @dl_require_symbols = ($bootname); @@ -326,7 +392,7 @@ print OUT <<'EOT'; # (this is a more complicated issue than it first appears) if (m:/: && -d $_) { push(@dirs, $_); next; } - # VMS: we may be using native VMS directry syntax instead of + # VMS: we may be using native VMS directory syntax instead of # Unix emulation, so check this as well if ($Is_VMS && /[:>\]]/ && -d $_) { push(@dirs, $_); next; } diff --git a/ext/DynaLoader/dl_aix.xs b/ext/DynaLoader/dl_aix.xs index d6acc689af..89b84396ef 100644 --- a/ext/DynaLoader/dl_aix.xs +++ b/ext/DynaLoader/dl_aix.xs @@ -11,6 +11,8 @@ * on statup... It can probably be trimmed more. */ +#define PERLIO_NOT_STDIO 0 + /* * @(#)dlfcn.c 1.5 revision of 93/02/14 20:14:17 * This is an unpublished work copyright (c) 1992 Helios Software GmbH @@ -87,14 +89,6 @@ # define FREAD(p,s,n,ldptr) fread(p,s,n,IOPTR(ldptr)) #endif -/* If using PerlIO, redefine these macros from <ldfcn.h> */ -#ifdef USE_PERLIO -#undef FSEEK -#undef FREAD -#define FSEEK(ldptr,o,p) PerlIO_seek(IOPTR(ldptr),(p==BEGINNING)?(OFFSET(ldptr)+o):o,p) -#define FREAD(p,s,n,ldptr) PerlIO_read(IOPTR(ldptr),p,s*n) -#endif - /* * We simulate dlopen() et al. through a call to load. Because AIX has * no call to find an exported symbol we read the loader section of the @@ -532,11 +526,7 @@ static int readExports(ModulePtr mp) } /* This first case is a hack, since it assumes that the 3rd parameter to FREAD is 1. See the redefinition of FREAD above to see how this works. */ -#ifdef USE_PERLIO - if (FREAD(ldbuf, sh.s_size, 1, ldp) != sh.s_size) { -#else if (FREAD(ldbuf, sh.s_size, 1, ldp) != 1) { -#endif errvalid++; strcpy(errbuf, "readExports: cannot read loader section"); safefree(ldbuf); diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 8ba7232d5e..1f4ffb1485 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -22,6 +22,10 @@ require Exporter; off_utf8 utf_to_utf encodings + utf8_decode + utf8_encode + utf8_upgrade + utf8_downgrade ); bootstrap Encode (); @@ -340,9 +344,9 @@ sub from_to return length($_[0] = $string); } -my %encoding = ( Unicode => bless({},'Encode::Unicode'), - 'iso10646-1' => bless({},'Encode::iso10646_1'), - ); +# The global hash is declared in XS code +$encoding{Unicode} = bless({},'Encode::Unicode'); +$encoding{'iso10646-1'} = bless({},'Encode::iso10646_1'); sub encodings { @@ -378,6 +382,7 @@ sub loadEncoding last unless $type eq '#'; } $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table')); + #warn "Loading $file"; return $class->read($fh,$name,$type); } else @@ -407,13 +412,20 @@ sub getEncoding package Encode::Unicode; -# Dummy package that provides the encode interface +# Dummy package that provides the encode interface but leaves data +# as UTF-8 encoded. It is here so that from_to() works. sub name { 'Unicode' } -sub toUnicode { $_[1] } +sub toUnicode +{ + my ($obj,$str,$chk) = @_; + Encode::utf8_upgrade($str); + $_[1] = '' if $chk; + return $str; +} -sub fromUnicode { $_[1] } +*fromUnicode = \&toUnicode; package Encode::Table; @@ -532,7 +544,9 @@ sub fromUnicode return $str; } -package Encode::iso10646_1;# +package Encode::iso10646_1; +# Encoding is 16-bit network order Unicode +# Used for X font encodings sub name { 'iso10646-1' } @@ -546,6 +560,7 @@ sub toUnicode $uni .= chr($code); } $_[1] = $str if $chk; + Encode::utf8_upgrade($uni); return $uni; } @@ -568,6 +583,7 @@ sub fromUnicode return $str; } + package Encode::Escape; use Carp; diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index c231bbab6b..a7acd88e67 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,6 +1,11 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" +#define U8 U8 +#include "encode.h" +#include "iso8859.h" +#include "EBCDIC.h" +#include "Symbols.h" #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ Perl_croak(aTHX_ "panic_unimplemented"); \ @@ -9,8 +14,469 @@ UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) +#ifdef USE_PERLIO +/* Define an encoding "layer" in the perliol.h sense. + The layer defined here "inherits" in an object-oriented sense from the + "perlio" layer with its PerlIOBuf_* "methods". + The implementation is particularly efficient as until Encode settles down + there is no point in tryint to tune it. + + The layer works by overloading the "fill" and "flush" methods. + + "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API + to convert the encoded data to UTF-8 form, then copies it back to the + buffer. The "base class's" read methods then see the UTF-8 data. + + "flush" transforms the UTF-8 data deposited by the "base class's write + method in the buffer back into the encoded form using the encode OO perl API, + then copies data back into the buffer and calls "SUPER::flush. + + Note that "flush" is _also_ called for read mode - we still do the (back)-translate + so that the the base class's "flush" sees the correct number of encoded chars + for positioning the seek pointer. (This double translation is the worst performance + issue - particularly with all-perl encode engine.) + +*/ + + +#include "perliol.h" + +typedef struct +{ + PerlIOBuf base; /* PerlIOBuf stuff */ + SV * bufsv; + SV * enc; +} PerlIOEncode; + + +IV +PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + dTHX; + dSP; + IV code; + code = PerlIOBuf_pushed(f,mode,Nullch,0); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpv("Encode",0))); + XPUSHs(sv_2mortal(newSVpvn(arg,len))); + PUTBACK; + if (perl_call_method("getEncoding",G_SCALAR) != 1) + return -1; + SPAGAIN; + e->enc = POPs; + PUTBACK; + if (!SvROK(e->enc)) + return -1; + SvREFCNT_inc(e->enc); + FREETMPS; + LEAVE; + PerlIOBase(f)->flags |= PERLIO_F_UTF8; + return code; +} + +IV +PerlIOEncode_popped(PerlIO *f) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + dTHX; + if (e->enc) + { + SvREFCNT_dec(e->enc); + e->enc = Nullsv; + } + if (e->bufsv) + { + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + return 0; +} + +STDCHAR * +PerlIOEncode_get_base(PerlIO *f) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + dTHX; + if (!e->base.bufsiz) + e->base.bufsiz = 1024; + if (!e->bufsv) + { + e->bufsv = newSV(e->base.bufsiz); + sv_setpvn(e->bufsv,"",0); + } + e->base.buf = (STDCHAR *)SvPVX(e->bufsv); + if (!e->base.ptr) + e->base.ptr = e->base.buf; + if (!e->base.end) + e->base.end = e->base.buf; + if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) + { + Perl_warn(aTHX_ " ptr %p(%p)%p", + e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); + abort(); + } + if (SvLEN(e->bufsv) < e->base.bufsiz) + { + SSize_t poff = e->base.ptr - e->base.buf; + SSize_t eoff = e->base.end - e->base.buf; + e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); + e->base.ptr = e->base.buf + poff; + e->base.end = e->base.buf + eoff; + } + if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) + { + Perl_warn(aTHX_ " ptr %p(%p)%p", + e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); + abort(); + } + return e->base.buf; +} + +IV +PerlIOEncode_fill(PerlIO *f) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + dTHX; + dSP; + IV code; + code = PerlIOBuf_fill(f); + if (code == 0) + { + SV *uni; + STRLEN len; + char *s; + /* Set SV that is the buffer to be buf..ptr */ + SvCUR_set(e->bufsv, e->base.end - e->base.buf); + SvUTF8_off(e->bufsv); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + XPUSHs(e->bufsv); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("toUnicode",G_SCALAR) != 1) + code = -1; + SPAGAIN; + uni = POPs; + PUTBACK; + /* Now get translated string (forced to UTF-8) and copy back to buffer + don't use sv_setsv as that may "steal" PV from returned temp + and so free() our known-large-enough buffer. + sv_setpvn() should do but let us do it long hand. + */ + s = SvPVutf8(uni,len); + if (s != SvPVX(e->bufsv)) + { + e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); + Move(s,e->base.buf,len,char); + SvCUR_set(e->bufsv,len); + } + SvUTF8_on(e->bufsv); + e->base.end = e->base.buf+len; + e->base.ptr = e->base.buf; + FREETMPS; + LEAVE; + } + return code; +} + +IV +PerlIOEncode_flush(PerlIO *f) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + IV code = 0; + dTHX; + if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))) + { + dSP; + SV *str; + char *s; + STRLEN len; + SSize_t left = 0; + if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) + { + /* This is really just a flag to see if we took all the data, if + we did PerlIOBase_flush avoids a seek to lower layer. + Need to revisit if we start getting clever with unreads or seeks-in-buffer + */ + left = e->base.end - e->base.ptr; + } + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(e->enc); + SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); + SvUTF8_on(e->bufsv); + XPUSHs(e->bufsv); + XPUSHs(&PL_sv_yes); + PUTBACK; + if (perl_call_method("fromUnicode",G_SCALAR) != 1) + code = -1; + SPAGAIN; + str = POPs; + PUTBACK; + s = SvPV(str,len); + if (s != SvPVX(e->bufsv)) + { + e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); + Move(s,e->base.buf,len,char); + SvCUR_set(e->bufsv,len); + } + SvUTF8_off(e->bufsv); + e->base.ptr = e->base.buf+len; + /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ + e->base.end = e->base.ptr + left; + FREETMPS; + LEAVE; + if (PerlIOBuf_flush(f) != 0) + code = -1; + } + return code; +} + +IV +PerlIOEncode_close(PerlIO *f) +{ + PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); + IV code = PerlIOBase_close(f); + dTHX; + if (e->bufsv) + { + SvREFCNT_dec(e->bufsv); + e->bufsv = Nullsv; + } + e->base.buf = NULL; + e->base.ptr = NULL; + e->base.end = NULL; + PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); + return code; +} + +Off_t +PerlIOEncode_tell(PerlIO *f) +{ + PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); + /* Unfortunately the only way to get a postion is to back-translate, + the UTF8-bytes we have buf..ptr and adjust accordingly. + But we will try and save any unread data in case stream + is un-seekable. + */ + if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) + { + Size_t count = b->end - b->ptr; + PerlIO_push(f,&PerlIO_pending,"r",Nullch,0); + /* Save what we have left to read */ + PerlIOSelf(f,PerlIOBuf)->bufsiz = count; + PerlIO_unread(f,b->ptr,count); + /* There isn't any unread data - we just saved it - so avoid the lower seek */ + b->end = b->ptr; + /* Flush ourselves - now one layer down, + this does the back translate and adjusts position + */ + PerlIO_flush(PerlIONext(f)); + /* Set position of the saved data */ + PerlIOSelf(f,PerlIOBuf)->posn = b->posn; + } + else + { + PerlIO_flush(f); + } + return b->posn; +} + +PerlIO_funcs PerlIO_encode = { + "encoding", + sizeof(PerlIOEncode), + PERLIO_K_BUFFERED, + PerlIOBase_fileno, + PerlIOBuf_fdopen, + PerlIOBuf_open, + PerlIOBuf_reopen, + PerlIOEncode_pushed, + PerlIOEncode_popped, + PerlIOBuf_read, + PerlIOBuf_unread, + PerlIOBuf_write, + PerlIOBuf_seek, + PerlIOEncode_tell, + PerlIOEncode_close, + PerlIOEncode_flush, + PerlIOEncode_fill, + PerlIOBase_eof, + PerlIOBase_error, + PerlIOBase_clearerr, + PerlIOBuf_setlinebuf, + PerlIOEncode_get_base, + PerlIOBuf_bufsiz, + PerlIOBuf_get_ptr, + PerlIOBuf_get_cnt, + PerlIOBuf_set_ptrcnt, +}; +#endif + +void +Encode_Define(pTHX_ encode_t *enc) +{ + HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + hv_store(hash,enc->name,strlen(enc->name),sv,0); +} + void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} +static SV * +encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) +{ + STRLEN slen; + U8 *s = (U8 *) SvPV(src,slen); + SV *dst = sv_2mortal(newSV(2*slen+1)); + if (slen) + { + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); + STRLEN dlen = SvLEN(dst); + int code; + while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) + { + SvCUR_set(dst,dlen); + SvPOK_on(dst); + + if (code == ENCODE_FALLBACK) + break; + + switch(code) + { + case ENCODE_NOSPACE: + { + STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); + if (need <= SvLEN(dst)) + need += UTF8_MAXLEN; + d = (U8 *) SvGROW(dst, need); + dlen = SvLEN(dst); + slen = SvCUR(src); + break; + } + + case ENCODE_NOREP: + if (dir == enc->f_utf8) + { + if (!check && ckWARN_d(WARN_UTF8)) + { + STRLEN clen; + UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0); + Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%x}\" does not map to %s", ch, enc->name); + /* FIXME: Skip over the character, copy in replacement and continue + * but that is messy so for now just fail. + */ + return &PL_sv_undef; + } + else + { + return &PL_sv_undef; + } + } + else + { + /* UTF-8 is supposed to be "Universal" so should not happen */ + Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", + enc->name, (SvCUR(src)-slen),s+slen); + } + break; + + case ENCODE_PARTIAL: + if (!check && ckWARN_d(WARN_UTF8)) + { + Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", + (dir == enc->f_utf8) ? "UTF-8" : enc->name); + } + return &PL_sv_undef; + + default: + Perl_croak(aTHX_ "Unexpected code %d converting %s %s", + code, (dir == enc->f_utf8) ? "to" : "from",enc->name); + return &PL_sv_undef; + } + } + SvCUR_set(dst,dlen); + SvPOK_on(dst); + if (check) + { + if (slen < SvCUR(src)) + { + Move(s+slen,s,SvCUR(src)-slen,U8); + } + SvCUR_set(src,SvCUR(src)-slen); + } + } + return dst; +} + +MODULE = Encode PACKAGE = Encode PREFIX = sv_ + +void +valid_utf8(sv) +SV * sv +CODE: + { + STRLEN len; + char *s = SvPV(sv,len); + if (!SvUTF8(sv) || is_utf8_string((U8*)s,len)) + XSRETURN_YES; + else + XSRETURN_NO; + } + +void +sv_utf8_encode(sv) +SV * sv + +bool +sv_utf8_decode(sv) +SV * sv + +void +sv_utf8_upgrade(sv) +SV * sv + +bool +sv_utf8_downgrade(sv,failok=0) +SV * sv +bool failok + +MODULE = Encode PACKAGE = Encode::XS PREFIX = Encode_ + +PROTOTYPES: ENABLE + +void +Encode_toUnicode(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: + { + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); + SvUTF8_on(ST(0)); + XSRETURN(1); + } + +void +Encode_fromUnicode(obj,src,check = 0) +SV * obj +SV * src +int check +CODE: + { + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + sv_utf8_upgrade(src); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); + XSRETURN(1); + } + MODULE = Encode PACKAGE = Encode PROTOTYPES: ENABLE @@ -182,7 +648,7 @@ _is_utf8(sv, ...) { SV * check = items == 2 ? ST(1) : Nullsv; if (SvPOK(sv)) { - RETVAL = SvUTF8(sv); + RETVAL = SvUTF8(sv) ? 1 : 0; if (RETVAL && SvTRUE(check) && !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) @@ -239,3 +705,12 @@ _utf_to_utf(sv, from, to, ...) OUTPUT: RETVAL +BOOT: +{ +#ifdef USE_PERLIO + PerlIO_define_layer(&PerlIO_encode); +#endif +#include "iso8859.def" +#include "EBCDIC.def" +#include "Symbols.def" +} diff --git a/ext/Encode/Encode/EncodeFormat.pod b/ext/Encode/Encode/EncodeFormat.pod new file mode 100644 index 0000000000..d83b12838d --- /dev/null +++ b/ext/Encode/Encode/EncodeFormat.pod @@ -0,0 +1,164 @@ +=head1 NAME + +EncodeFormat - the format of encoding tables of the Encode extension + +=head1 DESCRIPTION + +I<The format used in the encoding tables of the Encode extension has +been borrowed from Tcl, as has the following documentation been borrowed +from the same. The documentation has been reformatted as Perl pod.> + +Space would prohibit precompiling into Tcl every possible encoding +algorithm, so many encodings are stored on disk as dynamically-loadable +encoding files. This behavior also allows the user to create additional +encoding files that can be loaded using the same mechanism. These +encoding files contain information about the tables and/or escape +sequences used to map between an external encoding and Unicode. The +external encoding may consist of single-byte, multi-byte, or double-byte +characters. + +Each dynamically-loadable encoding is represented as a text file. The +initial line of the file, beginning with a ``#'' symbol, is a comment +that provides a human-readable description of the file. The next line +identifies the type of encoding file. It can be one of the following +letters: + +=over 4 + +=item [1] B<S> + +A single-byte encoding, where one character is always one byte long in +the encoding. An example is B<iso8859-1>, used by many European languages. + +=item [2] B<D> + +A double-byte encoding, where one character is always two bytes long in the +encoding. An example is B<big5>, used for Chinese text. + +=item [3] B<M> + +A multi-byte encoding, where one character may be either one or two +bytes long. Certain bytes are a lead bytes, indicating that another +byte must follow and that together the two bytes represent one +character. Other bytes are not lead bytes and represent themselves. +An example is B<shiftjis>, used by many Japanese computers. + +=item [4] B<E> + +An escape-sequence encoding, specifying that certain sequences of +bytes do not represent characters, but commands that describe how +following bytes should be interpreted. + +=back + +The rest of the lines in the file depend on the type. + +Cases [1], [2], and [3] are collectively referred to as table-based +encoding files. The lines in a table-based encoding file are in the +same format as this example taken from the B<shiftjis> encoding (this +is not the complete file): + + # Encoding file: shiftjis, multi-byte + M + 003F 0 40 + 00 + 0000000100020003000400050006000700080009000A000B000C000D000E000F + 0010001100120013001400150016001700180019001A001B001C001D001E001F + 0020002100220023002400250026002700280029002A002B002C002D002E002F + 0030003100320033003400350036003700380039003A003B003C003D003E003F + 0040004100420043004400450046004700480049004A004B004C004D004E004F + 0050005100520053005400550056005700580059005A005B005C005D005E005F + 0060006100620063006400650066006700680069006A006B006C006D006E006F + 0070007100720073007400750076007700780079007A007B007C007D203E007F + 0080000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000 + 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F + FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F + FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F + FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F + 0000000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000 + 81 + 0000000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000 + 0000000000000000000000000000000000000000000000000000000000000000 + 300030013002FF0CFF0E30FBFF1AFF1BFF1FFF01309B309C00B4FF4000A8FF3E + FFE3FF3F30FD30FE309D309E30034EDD30053006300730FC20152010FF0F005C + 301C2016FF5C2026202520182019201C201DFF08FF0930143015FF3BFF3DFF5B + FF5D30083009300A300B300C300D300E300F30103011FF0B221200B100D70000 + 00F7FF1D2260FF1CFF1E22662267221E22342642264000B0203220332103FFE5 + FF0400A200A3FF05FF03FF06FF0AFF2000A72606260525CB25CF25CE25C725C6 + 25A125A025B325B225BD25BC203B301221922190219121933013000000000000 + 000000000000000000000000000000002208220B2286228722822283222A2229 + 000000000000000000000000000000002227222800AC21D221D4220022030000 + 0000000000000000000000000000000000000000222022A52312220222072261 + 2252226A226B221A223D221D2235222B222C0000000000000000000000000000 + 212B2030266F266D266A2020202100B6000000000000000025EF000000000000 + +The third line of the file is three numbers. The first number is the +fallback character (in base 16) to use when converting from UTF-8 to +this encoding. The second number is a B<1> if this file represents +the encoding for a symbol font, or B<0> otherwise. The last number +(in base 10) is how many pages of data follow. + +Subsequent lines in the example above are pages that describe how to +map from the encoding into 2-byte Unicode. The first line in a page +identifies the page number. Following it are 256 double-byte numbers, +arranged as 16 rows of 16 numbers. Given a character in the encoding, +the high byte of that character is used to select which page, and the +low byte of that character is used as an index to select one of the +double-byte numbers in that page - the value obtained being the +corresponding Unicode character. By examination of the example above, +one can see that the characters 0x7E and 0x8163 in B<shiftjis> map to +203E and 2026 in Unicode, respectively. + +Following the first page will be all the other pages, each in the same +format as the first: one number identifying the page followed by 256 +double-byte Unicode characters. If a character in the encoding maps +to the Unicode character 0000, it means that the character doesn't +actually exist. If all characters on a page would map to 0000, that +page can be omitted. + +Case [4] is the escape-sequence encoding file. The lines in an this +type of file are in the same format as this example taken from the +B<iso2022-jp> encoding: + + # Encoding file: iso2022-jp, escape-driven + E + init {} + final {} + iso8859-1 \\x1b(B + jis0201 \\x1b(J + jis0208 \\x1b$@ + jis0208 \\x1b$B + jis0212 \\x1b$(D + gb2312 \\x1b$A + ksc5601 \\x1b$(C + +In the file, the first column represents an option and the second +column is the associated value. B<init> is a string to emit or expect +before the first character is converted, while B<final> is a string to +emit or expect after the last character. All other options are names +of table-based encodings; the associated value is the escape-sequence +that marks that encoding. Tcl syntax is used for the values; in the +above example, for instance, ``B<{}>'' represents the empty string and +``B<\\x1b>'' represents character 27. + +B<Completely Tcl-specific paragraph, ignore in the context of Perl> +When B<Tcl_GetEncoding> encounters an encoding I<name> that has not +been loaded, it attempts to load an encoding file called +I<name>B<.enc> from the B<encoding> subdirectory of each directory +specified in the library path B<$tcl_libPath>. If the encoding file +exists, but is malformed, an error message will be left in I<interp>. + +=head1 KEYWORDS + +utf, encoding, convert + +=head1 COPYRIGHT + + # Copyright (c) 1997-1998 Sun Microsystems, Inc. + # See the file "license.terms" for information on usage and redistribution + # of this file, and for a DISCLAIMER OF ALL WARRANTIES. + # RCS: @(#) $Id: Encoding.3,v 1.7 1999/10/13 00:32:05 hobbs Exp $ diff --git a/ext/Encode/Encode/ascii.enc b/ext/Encode/Encode/ascii.enc index e0320b8c58..284a9f51d2 100644 --- a/ext/Encode/Encode/ascii.enc +++ b/ext/Encode/Encode/ascii.enc @@ -9,7 +9,7 @@ S 0040004100420043004400450046004700480049004A004B004C004D004E004F 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F -0070007100720073007400750076007700780079007A007B007C007D007E0000 +0070007100720073007400750076007700780079007A007B007C007D007E007F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/ext/Encode/Encode/cp1006.enc b/ext/Encode/Encode/cp1006.enc new file mode 100644 index 0000000000..3ba00dd7e4 --- /dev/null +++ b/ext/Encode/Encode/cp1006.enc @@ -0,0 +1,20 @@ +# Encoding file: cp1006, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A006F006F106F206F306F406F506F606F706F806F9060C061B00AD061FFE81 +FE8DFE8EFE8EFE8FFE91FB56FB58FE93FE95FE97FB66FB68FE99FE9BFE9DFE9F +FB7AFB7CFEA1FEA3FEA5FEA7FEA9FB84FEABFEADFB8CFEAFFB8AFEB1FEB3FEB5 +FEB7FEB9FEBBFEBDFEBFFEC1FEC5FEC9FECAFECBFECCFECDFECEFECFFED0FED1 +FED3FED5FED7FED9FEDBFB92FB94FEDDFEDFFEE0FEE1FEE3FB9EFEE5FEE7FE85 +FEEDFBA6FBA8FBA9FBAAFE80FE89FE8AFE8BFEF1FEF2FEF3FBB0FBAEFE7CFE7D diff --git a/ext/Encode/Encode/cp1047.enc b/ext/Encode/Encode/cp1047.enc new file mode 100644 index 0000000000..8956fa4558 --- /dev/null +++ b/ext/Encode/Encode/cp1047.enc @@ -0,0 +1,20 @@ +# Encoding file: cp1047 (EBCDIC), single-byte +S +006F 0 1 +00 +0000000100020003009C00090086007F0097008D008E000B000C000D000E000F +0010001100120013009D000A00080087001800190092008F001C001D001E001F +0080008100820083008400850017001B00880089008A008B008C000500060007 +0090009100160093009400950096000400980099009A009B00140015009E001A +002000A000E200E400E000E100E300E500E700F100A2002E003C0028002B007C +002600E900EA00EB00E800ED00EE00EF00EC00DF00210024002A0029003B005E +002D002F00C200C400C000C100C300C500C700D100A6002C0025005F003E003F +00F800C900CA00CB00C800CD00CE00CF00CC0060003A002300400027003D0022 +00D800610062006300640065006600670068006900AB00BB00F000FD00FE00B1 +00B0006A006B006C006D006E006F00700071007200AA00BA00E600B800C600A4 +00B5007E0073007400750076007700780079007A00A100BF00D0005B00DE00AE +00AC00A300A500B700A900A700B600BC00BD00BE00DD00A800AF005D00B400D7 +007B00410042004300440045004600470048004900AD00F400F600F200F300F5 +007D004A004B004C004D004E004F00500051005200B900FB00FC00F900FA00FF +005C00F70053005400550056005700580059005A00B200D400D600D200D300D5 +003000310032003300340035003600370038003900B300DB00DC00D900DA009F diff --git a/ext/Encode/Encode/cp37.enc b/ext/Encode/Encode/cp37.enc new file mode 100644 index 0000000000..94d8c335bf --- /dev/null +++ b/ext/Encode/Encode/cp37.enc @@ -0,0 +1,20 @@ +# Encoding file: cp37 (EBCDIC), single-byte +S +006F 0 1 +00 +0000000100020003009C00090086007F0097008D008E000B000C000D000E000F +0010001100120013009D008500080087001800190092008F001C001D001E001F +00800081008200830084000A0017001B00880089008A008B008C000500060007 +0090009100160093009400950096000400980099009A009B00140015009E001A +002000A000E200E400E000E100E300E500E700F100A2002E003C0028002B007C +002600E900EA00EB00E800ED00EE00EF00EC00DF00210024002A0029003B00AC +002D002F00C200C400C000C100C300C500C700D100A6002C0025005F003E003F +00F800C900CA00CB00C800CD00CE00CF00CC0060003A002300400027003D0022 +00D800610062006300640065006600670068006900AB00BB00F000FD00FE00B1 +00B0006A006B006C006D006E006F00700071007200AA00BA00E600B800C600A4 +00B5007E0073007400750076007700780079007A00A100BF00D000DD00DE00AE +005E00A300A500B700A900A700B600BC00BD00BE005B005D00AF00A800B400D7 +007B00410042004300440045004600470048004900AD00F400F600F200F300F5 +007D004A004B004C004D004E004F00500051005200B900FB00FC00F900FA00FF +005C00F70053005400550056005700580059005A00B200D400D600D200D300D5 +003000310032003300340035003600370038003900B300DB00DC00D900DA009F diff --git a/ext/Encode/Encode/cp424.enc b/ext/Encode/Encode/cp424.enc new file mode 100644 index 0000000000..3b0c23ec7a --- /dev/null +++ b/ext/Encode/Encode/cp424.enc @@ -0,0 +1,20 @@ +# Encoding file: cp424, single-byte +S +003F 0 1 +00 +0000000100020003009C00090086007F0097008D008E000B000C000D000E000F +0010001100120013009D008500080087001800190092008F001C001D001E001F +00800081008200830084000A0017001B00880089008A008B008C000500060007 +0090009100160093009400950096000400980099009A009B00140015009E001A +002005D005D105D205D305D405D505D605D705D800A2002E003C0028002B007C +002605D905DA05DB05DC05DD05DE05DF05E005E100210024002A0029003B00AC +002D002F05E205E305E405E505E605E705E805E900A6002C0025005F003E003F +000005EA0000000000A000000000000020170060003A002300400027003D0022 +000000610062006300640065006600670068006900AB00BB00000000000000B1 +00B0006A006B006C006D006E006F00700071007200000000000000B8000000A4 +00B5007E0073007400750076007700780079007A0000000000000000000000AE +005E00A300A500B700A900A700B600BC00BD00BE005B005D00AF00A800B400D7 +007B00410042004300440045004600470048004900AD00000000000000000000 +007D004A004B004C004D004E004F00500051005200B900000000000000000000 +005C00F70053005400550056005700580059005A00B200000000000000000000 +003000310032003300340035003600370038003900B30000000000000000009F diff --git a/ext/Encode/Encode/cp856.enc b/ext/Encode/Encode/cp856.enc new file mode 100644 index 0000000000..cab493c60a --- /dev/null +++ b/ext/Encode/Encode/cp856.enc @@ -0,0 +1,20 @@ +# Encoding file: cp856, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +05D005D105D205D305D405D505D605D705D805D905DA05DB05DC05DD05DE05DF +05E005E105E205E305E405E505E605E705E805E905EA000000A3000000D70000 +00000000000000000000000000000000000000AE00AC00BD00BC000000AB00BB +2591259225932502252400000000000000A9256325512557255D00A200A52510 +25142534252C251C2500253C00000000255A25542569256625602550256C00A4 +0000000000000000000000000000000000002518250C2588258400A600002580 +00000000000000000000000000B5000000000000000000000000000000AF00B4 +00AD00B1201700BE00B600A700F700B800B000A800B700B900B300B225A000A0 diff --git a/ext/Encode/Encode/gsm0338.enc b/ext/Encode/Encode/gsm0338.enc new file mode 100644 index 0000000000..bf09e70e8b --- /dev/null +++ b/ext/Encode/Encode/gsm0338.enc @@ -0,0 +1,20 @@ +# Encoding file: GSM 03.38, single-byte +S +003F 0 1 +00 +004000A3002400A500E800E900F900EC00F200E7000A00D800F8000D00C500E5 +0394005F03A60393039B03A903A003A803A30398039E00A000C600E600DF00C9 +002000210022002300A400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +00A1004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A00C400D600D100DC00A7 +00BF006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A00E400F600F100FC00E0 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 diff --git a/ext/Encode/Encode/iso8859-10.enc b/ext/Encode/Encode/iso8859-10.enc new file mode 100644 index 0000000000..934b3b920b --- /dev/null +++ b/ext/Encode/Encode/iso8859-10.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-10, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A0010401120122012A0128013600A7013B011001600166017D00AD016A014A +00B0010501130123012B0129013700B7013C011101610167017E2015016B014B +010000C100C200C300C400C500C6012E010C00C9011800CB011600CD00CE00CF +00D00145014C00D300D400D500D6016800D8017200DA00DB00DC00DD00DE00DF +010100E100E200E300E400E500E6012F010D00E9011900EB011700ED00EE00EF +00F00146014D00F300F400F500F6016900F8017300FA00FB00FC00FD00FE0138 diff --git a/ext/Encode/Encode/iso8859-13.enc b/ext/Encode/Encode/iso8859-13.enc new file mode 100644 index 0000000000..b7edcaf38f --- /dev/null +++ b/ext/Encode/Encode/iso8859-13.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-13, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A0201D00A200A300A4201E00A600A700D800A9015600AB00AC00AD00AE00C6 +00B000B100B200B3201C00B500B600B700F800B9015700BB00BC00BD00BE00E6 +0104012E0100010600C400C501180112010C00C90179011601220136012A013B +01600143014500D3014C00D500D600D701720141015A016A00DC017B017D00DF +0105012F0101010700E400E501190113010D00E9017A011701230137012B013C +01610144014600F3014D00F500F600F701730142015B016B00FC017C017E2019 diff --git a/ext/Encode/Encode/iso8859-14.enc b/ext/Encode/Encode/iso8859-14.enc new file mode 100644 index 0000000000..a65ba05b9a --- /dev/null +++ b/ext/Encode/Encode/iso8859-14.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-14, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A01E021E0300A3010A010B1E0A00A71E8000A91E821E0B1EF200AD00AE0178 +1E1E1E1F012001211E401E4100B61E561E811E571E831E601EF31E841E851E61 +00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF +017400D100D200D300D400D500D61E6A00D800D900DA00DB00DC00DD017600DF +00E000E100E200E300E400E500E600E700E800E900EA00EB00EC00ED00EE00EF +017500F100F200F300F400F500F61E6B00F800F900FA00FB00FC00FD017700FF diff --git a/ext/Encode/Encode/iso8859-15.enc b/ext/Encode/Encode/iso8859-15.enc new file mode 100644 index 0000000000..823af466e5 --- /dev/null +++ b/ext/Encode/Encode/iso8859-15.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-15, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A000A100A200A320AC00A5016000A7016100A900AA00AB00AC00AD00AE00AF +00B000B100B200B3017D00B500B600B7017E00B900BA00BB01520153017800BF +00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF +00D000D100D200D300D400D500D600D700D800D900DA00DB00DC00DD00DE00DF +00E000E100E200E300E400E500E600E700E800E900EA00EB00EC00ED00EE00EF +00F000F100F200F300F400F500F600F700F800F900FA00FB00FC00FD00FE00FF diff --git a/ext/Encode/Encode/iso8859-16.enc b/ext/Encode/Encode/iso8859-16.enc new file mode 100644 index 0000000000..1936b97b6f --- /dev/null +++ b/ext/Encode/Encode/iso8859-16.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-16, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A001040105014120AC00AB016000A7016100A90218201E017900AD017A017B +00B000B1010C0142017D201D00B600B7017E010D021900BB015201530178017C +00C000C100C2010200C4010600C600C700C800C900CA00CB00CC00CD00CE00CF +0110014300D200D300D4015000D6015A017000D900DA00DB00DC0118021A00DF +00E000E100E2010300E4010700E600E700E800E900EA00EB00EC00ED00EE00EF +0111014400F200F300F4015100F6015B017100F900FA00FB00FC0119021B00FF diff --git a/ext/Encode/Encode/posix-bc.enc b/ext/Encode/Encode/posix-bc.enc new file mode 100644 index 0000000000..8b533a4933 --- /dev/null +++ b/ext/Encode/Encode/posix-bc.enc @@ -0,0 +1,20 @@ +# Encoding file: posix-bc (EBCDIC), single-byte +S +006F 0 1 +00 +0000000100020003009C00090086007F0097008D008E000B000C000D000E000F +0010001100120013009D000A00080087001800190092008F001C001D001E001F +0080008100820083008400850017001B00880089008A008B008C000500060007 +0090009100160093009400950096000400980099009A009B00140015009E001A +002000A000E200E400E000E100E300E500E700F10060002E003C0028002B007C +002600E900EA00EB00E800ED00EE00EF00EC00DF00210024002A0029003B009F +002D002F00C200C400C000C100C300C500C700D1005E002C0025005F003E003F +00F800C900CA00CB00C800CD00CE00CF00CC00A8003A002300400027003D0022 +00D800610062006300640065006600670068006900AB00BB00F000FD00FE00B1 +00B0006A006B006C006D006E006F00700071007200AA00BA00E600B800C600A4 +00B500AF0073007400750076007700780079007A00A100BF00D000DD00DE00AE +00A200A300A500B700A900A700B600BC00BD00BE00AC005B005C005D00B400D7 +00F900410042004300440045004600470048004900AD00F400F600F200F300F5 +00A6004A004B004C004D004E004F00500051005200B900FB00FC00DB00FA00FF +00D900F70053005400550056005700580059005A00B200D400D600D200D300D5 +003000310032003300340035003600370038003900B3007B00DC007D00DA007E diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 329937e0e2..4b1ec95ad2 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,7 +1,25 @@ use ExtUtils::MakeMaker; + +my %tables = (iso8859 => ['ascii.enc', 'cp1250.enc'], + EBCDIC => ['cp1047.enc','cp37.enc','posix-bc.enc'], + Symbols => ['symbol.enc','dingbats.enc'], + ); + +opendir(ENC,'Encode'); +while (defined(my $file = readdir(ENC))) + { + if ($file =~ /iso8859.*\.enc/) + { + push(@{$tables{iso8859}},$file); + } + } +closedir(ENC); + + WriteMakefile( NAME => "Encode", VERSION_FROM => 'Encode.pm', + OBJECT => '$(O_FILES)', 'dist' => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', @@ -9,3 +27,84 @@ WriteMakefile( }, MAN3PODS => {}, ); + +package MY; + + +sub post_initialize +{ + my ($self) = @_; + my %o; + # Find existing O_FILES + foreach my $f (@{$self->{'O_FILES'}}) + { + $o{$f} = 1; + } + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + # Reset the variable + $self->{'O_FILES'} = [sort keys %o]; + my @files; + foreach my $table (keys %tables) + { + foreach my $ext (qw($(OBJ_EXT) .c .h .def)) + { + push (@files,$table.$ext); + } + } + $self->{'clean'}{'FILES'} .= join(' ',@files); + return ''; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->curdir,'Encode'); + my $str = "# Encode$(OBJ_EXT) depends on .h and .def files not .c files - but all written by compile\n"; + $str .= 'Encode$(OBJ_EXT) :'; + my @rules; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : compile Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $numlines = 1; + $lengthsofar = length($str); + $continuator = ''; + $str .= "\n\t\$(PERL) compile \$\@"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= "\n\t\$(PERL) compile \$\@"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= "\n\n"; + } + return $str; +} diff --git a/ext/Encode/compile b/ext/Encode/compile new file mode 100755 index 0000000000..b890a04d81 --- /dev/null +++ b/ext/Encode/compile @@ -0,0 +1,530 @@ +#!../../perl -w +BEGIN { @INC = '../../lib' }; +use strict; + +sub encode_U +{ + # UTF-8 encode long hand - only covers part of perl's range + my $uv = shift; + if ($uv < 0x80) + { + return chr($uv) + } + if ($uv < 0x800) + { + return chr(($uv >> 6) | 0xC0). + chr(($uv & 0x3F) | 0x80); + } + return chr(($uv >> 12) | 0xE0). + chr((($uv >> 6) & 0x3F) | 0x80). + chr(($uv & 0x3F) | 0x80); +} + +sub encode_S +{ + # encode single byte + my ($ch,$page) = @_; + return chr($ch); +} + +sub encode_D +{ + # encode double byte MS byte first + my ($ch,$page) = @_; + return chr($page).chr($ch); +} + +sub encode_M +{ + # encode Multi-byte - single for 0..255 otherwise double + my ($ch,$page) = @_; + return &encode_D if $page; + return &encode_S; +} + +# Win32 does not expand globs on command line +eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32'); + +my $cname = shift(@ARGV); +chmod(0666,$cname) if -f $cname && !-w $cname; +open(C,">$cname") || die "Cannot open $cname:$!"; +my $dname = $cname; +$dname =~ s/(\.[^\.]*)?$/.def/; + +my ($doC,$doEnc,$doUcm); + +if ($cname =~ /\.(c|xs)$/) + { + $doC = 1; + chmod(0666,$dname) if -f $cname && !-w $dname; + open(D,">$dname") || die "Cannot open $dname:$!"; + my $hname = $cname; + $hname =~ s/(\.[^\.]*)?$/.h/; + chmod(0666,$hname) if -f $cname && !-w $hname; + open(H,">$hname") || die "Cannot open $hname:$!"; + + foreach my $fh (\*C,\*D,\*H) + { + print $fh <<"END"; +/* + !!!!!!! DO NOT EDIT THIS FILE !!!!!!! + This file was autogenerated by: + $^X $0 $cname @ARGV +*/ +END + } + + if ($cname =~ /(\w+)\.xs$/) + { + print C "#include <EXTERN.h>\n"; + print C "#include <perl.h>\n"; + print C "#include <XSUB.h>\n"; + print C "#define U8 U8\n"; + } + print C "#include \"encode.h\"\n"; + } +elsif ($cname =~ /\.enc$/) + { + $doEnc = 1; + } +elsif ($cname =~ /\.ucm$/) + { + $doUcm = 1; + } + +my %encoding; +my %strings; + +sub cmp_name +{ + if ($a =~ /^.*-(\d+)/) + { + my $an = $1; + if ($b =~ /^.*-(\d+)/) + { + my $r = $an <=> $1; + return $r if $r; + } + } + return $a cmp $b; +} + +foreach my $enc (sort cmp_name @ARGV) + { + my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/; + if (open(E,$enc)) + { + if ($sfx eq 'enc') + { + compile_enc(\*E,lc($name),\*C); + } + else + { + compile_ucm(\*E,lc($name),\*C); + } + } + else + { + warn "Cannot open $enc for $name:$!"; + } + } + +if ($doC) + { + foreach my $enc (sort cmp_name keys %encoding) + { + my $sym = "${enc}_encoding"; + $sym =~ s/\W+/_/g; + print C "encode_t $sym = \n"; + print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n\n"; + } + + foreach my $enc (sort cmp_name keys %encoding) + { + my $sym = "${enc}_encoding"; + $sym =~ s/\W+/_/g; + print H "extern encode_t $sym;\n"; + print D " Encode_Define(aTHX_ &$sym);\n"; + } + + if ($cname =~ /(\w+)\.xs$/) + { + my $mod = $1; + print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n"; + print C "BOOT:\n{\n"; + print C "#include \"$dname\"\n"; + print C "}\n"; + } + close(D); + close(H); + } +close(C); + + +sub compile_ucm +{ + my ($fh,$name,$ch) = @_; + my $e2u = {}; + my $u2e = {}; + my $cs; + my %attr; + while (<$fh>) + { + s/#.*$//; + last if /^\s*CHARMAP\s*$/i; + if (/^\s*<(\w+)>\s+"?([^"]*)"?\s*$/i) + { + $attr{$1} = $2; + } + } + if (!defined($cs = $attr{'code_set_name'})) + { + warn "No <code_set_name> in $name\n"; + } + else + { + # $name = lc($cs); + } + my $erep; + my $urep; + if (exists $attr{'subchar'}) + { + my @byte = $attr{'subchar'} =~ /^\s*(?:\\x([0-9a-f]+))+\s*$/; + $erep = join('',map(hex($_),@byte)); + } + warn "Scanning $name ($cs)\n"; + my $nfb = 0; + my $hfb = 0; + while (<$fh>) + { + s/#.*$//; + last if /^\s*END\s+CHARMAP\s*$/i; + next if /^\s*$/; + my ($u,@byte) = /^<U([0-9a-f]+)>\s+(?:\\x([0-9a-f]+))+\s*(\|[0-3]|)\s*$/i; + my $fb = pop(@byte); + if (defined($u)) + { + my $uch = encode_U(hex($u)); + my $ech = join('',map(chr(hex($_)),@byte)); + if (length($fb)) + { + $fb = substr($fb,1); + $hfb++; + } + else + { + $nfb++; + $fb = '0'; + } + # $fb is fallback flag + # 0 - round trip safe + # 1 - fallback for unicode -> enc + # 2 - skip sub-char mapping + # 3 - fallback enc -> unicode + enter($u2e,$uch,$ech,$u2e,$fb+0) if ($fb =~ /[01]/); + enter($e2u,$ech,$uch,$e2u,$fb+0) if ($fb =~ /[03]/); + } + else + { + warn $_; + } + + } + if ($nfb && $hfb) + { + die "$nfb entries without fallback, $hfb entries with\n"; + } + if ($doC) + { + output($ch,$name.'_utf8',$e2u); + output($ch,'utf8_'.$name,$u2e); + $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, + outstring($ch,$e2u->{Cname}.'_def',$erep),length($erep)]; + } + elsif ($doEnc) + { + output_enc($ch,$name,$e2u); + } + elsif ($doUcm) + { + output_ucm($ch,$name,$u2e); + } +} + +sub compile_enc +{ + my ($fh,$name,$ch) = @_; + my $e2u = {}; + my $u2e = {}; + + my $type; + while ($type = <$fh>) + { + last if $type !~ /^\s*#/; + } + chomp($type); + return if $type eq 'E'; + my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); + warn "$type encoded $name\n"; + my $rep = ''; + { + my $v = hex($def); + no strict 'refs'; + $rep = &{"encode_$type"}($v & 0xFF, ($v >> 8) & 0xffe); + } + while ($pages--) + { + my $line = <$fh>; + chomp($line); + my $page = hex($line); + my $ch = 0; + for (my $i = 0; $i < 16; $i++) + { + my $line = <$fh>; + for (my $j = 0; $j < 16; $j++) + { + no strict 'refs'; + my $ech = &{"encode_$type"}($ch,$page); + my $val = hex(substr($line,0,4,'')); + if ($val || (!$ch && !$page)) + { + my $uch = encode_U($val); + enter($e2u,$ech,$uch,$e2u,0); + enter($u2e,$uch,$ech,$u2e,0); + } + else + { + # No character at this position + # enter($e2u,$ech,undef,$e2u); + } + $ch++; + } + } + } + if ($doC) + { + output($ch,$name.'_utf8',$e2u); + output($ch,'utf8_'.$name,$u2e); + $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, + outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)]; + } + elsif ($doEnc) + { + output_enc($ch,$name,$e2u); + } + elsif ($doUcm) + { + output_ucm($ch,$name,$u2e); + } +} + +sub enter +{ + my ($a,$s,$d,$t,$fb) = @_; + $t = $a if @_ < 4; + my $b = substr($s,0,1); + my $e = $a->{$b}; + unless ($e) + { # 0 1 2 3 4 5 + $e = [$b,$b,'',{},length($s),0,$fb]; + $a->{$b} = $e; + } + if (length($s) > 1) + { + enter($e->[3],substr($s,1),$d,$t,$fb); + } + else + { + $e->[2] = $d; + $e->[3] = $t; + $e->[5] = length($d); + } +} + +sub outstring +{ + my ($fh,$name,$s) = @_; + my $sym = $strings{$s}; + unless ($sym) + { + foreach my $o (keys %strings) + { + my $i = index($o,$s); + if ($i >= 0) + { + $sym = $strings{$o}; + $sym .= sprintf("+0x%02x",$i) if ($i); + return $sym; + } + } + $strings{$s} = $sym = $name; + printf $fh "\nstatic const U8 %s[%d] =\n",$name,length($s); + # Do in chunks of 16 chars to constrain line length + # Assumes ANSI C adjacent string litteral concatenation + while (length($s)) + { + my $c = substr($s,0,16,''); + print $fh '"',join('',map(sprintf('\x%02x',ord($_)),split(//,$c))),'"'; + print $fh "\n" if length($s); + } + printf $fh ";\n"; + } + return $sym; +} + +sub process +{ + my ($name,$a) = @_; + $name =~ s/\W+/_/g; + $a->{Cname} = $name; + my @keys = grep(ref($a->{$_}),sort keys %$a); + my $l; + my @ent; + foreach my $b (@keys) + { + my ($s,$f,$out,$t,$end) = @{$a->{$b}}; + if (defined($l) && + ord($b) == ord($a->{$l}[1])+1 && + $a->{$l}[3] == $a->{$b}[3] && + $a->{$l}[4] == $a->{$b}[4] && + $a->{$l}[5] == $a->{$b}[5] && + $a->{$l}[6] == $a->{$b}[6] + # && length($a->{$l}[2]) < 16 + ) + { + my $i = ord($b)-ord($a->{$l}[0]); + $a->{$l}[1] = $b; + $a->{$l}[2] .= $a->{$b}[2]; + } + else + { + $l = $b; + push(@ent,$b); + } + if (exists $t->{Cname}) + { + $t->{'Forward'} = 1 if $t != $a; + } + else + { + process(sprintf("%s_%02x",$name,ord($s)),$t); + } + } + if (ord($keys[-1]) < 255) + { + my $t = chr(ord($keys[-1])+1); + $a->{$t} = [$t,chr(255),undef,$a,0,0]; + push(@ent,$t); + } + $a->{'Entries'} = \@ent; +} + +sub outtable +{ + my ($fh,$a) = @_; + my $name = $a->{'Cname'}; + # String tables + foreach my $b (@{$a->{'Entries'}}) + { + next unless $a->{$b}[5]; + my $s = ord($a->{$b}[0]); + my $e = ord($a->{$b}[1]); + outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$a->{$b}[2]); + } + if ($a->{'Forward'}) + { + print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; + } + $a->{'Done'} = 1; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l) = @{$a->{$b}}; + outtable($fh,$t) unless $t->{'Done'}; + } + print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n"; + foreach my $b (@{$a->{'Entries'}}) + { + my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}}; + my $sc = ord($s); + my $ec = ord($e); + $end |= 0x80 if $fb; + print $fh "{"; + if ($l) + { + printf $fh outstring($fh,'',$out); + } + else + { + print $fh "0"; + } + print $fh ",",$t->{Cname}; + printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec; + } + print $fh "};\n"; +} + +sub output +{ + my ($fh,$name,$a) = @_; + process($name,$a); + # Sub-tables + outtable($fh,$a); +} + +sub output_enc +{ + my ($fh,$name,$a) = @_; + foreach my $b (sort keys %$a) + { + my ($s,$e,$out,$t,$end,$l,$fb) = @{$a->{$b}}; + } +} + +sub decode_U +{ + my $s = shift; + +} + + +sub output_ucm_page +{ + my ($fh,$a,$t,$pre) = @_; + # warn sprintf("Page %x\n",$pre); + foreach my $b (sort keys %$t) + { + my ($s,$e,$out,$n,$end,$l,$fb) = @{$t->{$b}}; + die "oops $s $e" unless $s eq $e; + my $u = ord($s); + if ($n != $a && $n != $t) + { + output_ucm_page($fh,$a,$n,(($pre|($u &0x3F)) << 6)&0xFFFF); + } + elsif (length($out)) + { + if ($pre) + { + $u = $pre|($u &0x3f); + } + printf $fh "<U%04X> ",$u; + foreach my $c (split(//,$out)) + { + printf $fh "\\x%02X",ord($c); + } + printf $fh " |%d\n",($fb ? 1 : 0); + } + else + { + warn join(',',@{$t->{$b}},$a,$t); + } + } +} + +sub output_ucm +{ + my ($fh,$name,$a) = @_; + print $fh "CHARMAP\n"; + output_ucm_page($fh,$a,$a,0); + print $fh "END CHARMAP\n"; +} + diff --git a/ext/Encode/encengine.c b/ext/Encode/encengine.c new file mode 100644 index 0000000000..513ef9ac5b --- /dev/null +++ b/ext/Encode/encengine.c @@ -0,0 +1,164 @@ +/* +Data structures for encoding transformations. + +Perl works internally in either a native 'byte' encoding or +in UTF-8 encoded Unicode. We have no immediate need for a "wchar_t" +representation. When we do we can use utf8_to_uv(). + +Most character encodings are either simple byte mappings or +variable length multi-byte encodings. UTF-8 can be viewed as a +rather extreme case of the latter. + +So to solve an important part of perl's encode needs we need to solve the +"multi-byte -> multi-byte" case. The simple byte forms are then just degenerate +case. (Where one of multi-bytes will usually be UTF-8.) + +The other type of encoding is a shift encoding where a prefix sequence +determines what subsequent bytes mean. Such encodings have state. + +We also need to handle case where a character in one encoding has to be +represented as multiple characters in the other. e.g. letter+diacritic. + +The process can be considered as pseudo perl: + +my $dst = ''; +while (length($src)) + { + my $size = $count($src); + my $in_seq = substr($src,0,$size,''); + my $out_seq = $s2d_hash{$in_seq}; + if (defined $out_seq) + { + $dst .= $out_seq; + } + else + { + # an error condition + } + } +return $dst; + +That has the following components: + &src_count - a "rule" for how many bytes make up the next character in the + source. + %s2d_hash - a mapping from input sequences to output sequences + +The problem with that scheme is that it does not allow the output +character repertoire to affect the characters considered from the +input. + +So we use a "trie" representation which can also be considered +a state machine: + +my $dst = ''; +my $seq = \@s2d_seq; +my $next = \@s2d_next; +while (length($src)) + { + my $byte = $substr($src,0,1,''); + my $out_seq = $seq->[$byte]; + if (defined $out_seq) + { + $dst .= $out_seq; + } + else + { + # an error condition + } + ($next,$seq) = @$next->[$byte] if $next; + } +return $dst; + +There is now a pair of data structures to represent everything. +It is valid for output sequence at a particular point to +be defined but zero length, that just means "don't know yet". +For the single byte case there is no 'next' so new tables will be the same as +the original tables. For a multi-byte case a prefix byte will flip to the tables +for the next page (adding nothing to the output), then the tables for the page +will provide the actual output and set tables back to original base page. + +This scheme can also handle shift encodings. + +A slight enhancement to the scheme also allows for look-ahead - if +we add a flag to re-add the removed byte to the source we could handle + a" -> ä + ab -> a (and take b back please) + +*/ + +#include <EXTERN.h> +#include <perl.h> +#define U8 U8 +#include "encode.h" + +int +do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, U8 *dst, STRLEN dlen, STRLEN *dout, int approx) +{ + const U8 *s = src; + const U8 *send = s+*slen; + const U8 *last = s; + U8 *d = dst; + U8 *dend = d+dlen; + int code = 0; + while (s < send) + { + encpage_t *e = enc; + U8 byte = *s; + while (byte > e->max) + e++; + if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) + { + const U8 *cend = s + (e->slen & 0x7f); + if (cend <= send) + { + STRLEN n; + if ((n = e->dlen)) + { + const U8 *out = e->seq+n*(byte - e->min); + U8 *oend = d+n; + if (dst) + { + if (oend <= dend) + { + while (d < oend) + *d++ = *out++; + } + else + { + /* Out of space */ + code = ENCODE_NOSPACE; + break; + } + } + else + d = oend; + } + enc = e->next; + s++; + if (s == cend) + { + if (approx && (e->slen & 0x80)) + code = ENCODE_FALLBACK; + last = s; + } + } + else + { + /* partial source character */ + code = ENCODE_PARTIAL; + break; + } + } + else + { + /* Cannot represent */ + code = ENCODE_NOREP; + break; + } + } + *slen = last - src; + *dout = d - dst; + return code; +} + + diff --git a/ext/Encode/encode.h b/ext/Encode/encode.h new file mode 100644 index 0000000000..853ad041b4 --- /dev/null +++ b/ext/Encode/encode.h @@ -0,0 +1,40 @@ +#ifndef ENCODE_H +#define ENCODE_H +#ifndef U8 +typedef unsigned char U8; +#endif + +typedef struct encpage_s encpage_t; + +struct encpage_s +{ + const U8 *seq; + encpage_t *next; + U8 min; + U8 max; + U8 dlen; + U8 slen; +}; + +typedef struct encode_s encode_t; +struct encode_s +{ + const char *name; + encpage_t *t_utf8; + encpage_t *f_utf8; + const U8 *rep; + int replen; +}; + +#ifdef U8 +extern int do_encode(encpage_t *enc, const U8 *src, STRLEN *slen, + U8 *dst, STRLEN dlen, STRLEN *dout, int approx); + +extern void Encode_DefineEncoding(encode_t *enc); +#endif + +#define ENCODE_NOSPACE 1 +#define ENCODE_PARTIAL 2 +#define ENCODE_NOREP 3 +#define ENCODE_FALLBACK 4 +#endif diff --git a/ext/Errno/Errno_pm.PL b/ext/Errno/Errno_pm.PL index 0666b2af60..3e34b90bee 100644 --- a/ext/Errno/Errno_pm.PL +++ b/ext/Errno/Errno_pm.PL @@ -2,9 +2,7 @@ use ExtUtils::MakeMaker; use Config; use strict; -use vars qw($VERSION); - -$VERSION = "1.111"; +our $VERSION = "1.111"; my %err = (); @@ -29,6 +27,12 @@ sub process_file { warn "Cannot open '$file'"; return; } + } elsif ($Config{gccversion} ne '') { + # With the -dM option, gcc outputs every #define it finds + unless(open(FH,"$Config{cc} -E -dM $file |")) { + warn "Cannot open '$file'"; + return; + } } else { unless(open(FH,"< $file")) { # This file could be a temporary file created by cppstdin @@ -79,6 +83,10 @@ sub get_files { } elsif ($^O eq 'vmesa') { # OS/390 C compiler doesn't generate #file or #line directives $file{'../../vmesa/errno.h'} = 1; + } elsif ($^O eq 'linux') { + # Some Linuxes have weird errno.hs which generate + # no #file or #line directives + $file{'/usr/include/errno.h'} = 1; } else { open(CPPI,"> errno.c") or die "Cannot open errno.c"; @@ -175,7 +183,7 @@ sub write_errno_pm { # package Errno; -use vars qw(\@EXPORT_OK \%EXPORT_TAGS \@ISA \$VERSION \%errno \$AUTOLOAD); +our (\@EXPORT_OK,\%EXPORT_TAGS,\@ISA,\$VERSION,\%errno,\$AUTOLOAD); use Exporter (); use Config; use strict; diff --git a/ext/Fcntl/Fcntl.pm b/ext/Fcntl/Fcntl.pm index 92103a1eaf..c68dda1c2f 100644 --- a/ext/Fcntl/Fcntl.pm +++ b/ext/Fcntl/Fcntl.pm @@ -201,7 +201,7 @@ sub S_ISENFMT { ( $_[0] & _S_IFMT() ) == S_IFENFMT() } sub AUTOLOAD { (my $constname = $AUTOLOAD) =~ s/.*:://; - my $val = constant($constname, 0); + my $val = constant($constname); if ($! != 0) { if ($! =~ /Invalid/ || $!{EINVAL}) { $AutoLoader::AUTOLOAD = $AUTOLOAD; diff --git a/ext/Fcntl/Fcntl.xs b/ext/Fcntl/Fcntl.xs index b597e03c1a..21029b212c 100644 --- a/ext/Fcntl/Fcntl.xs +++ b/ext/Fcntl/Fcntl.xs @@ -40,13 +40,13 @@ not_here(char *s) return -1; } -static double -constant(char *name, int arg) +static IV +constant(char *name) { errno = 0; - switch (*name) { + switch (*(name++)) { case '_': - if (strEQ(name, "_S_IFMT")) /* Yes, on name _S_IFMT return S_IFMT. */ + if (strEQ(name, "S_IFMT")) /* Yes, on name _S_IFMT return S_IFMT. */ #ifdef S_IFMT return S_IFMT; #else @@ -54,218 +54,219 @@ constant(char *name, int arg) #endif break; case 'F': - if (strnEQ(name, "F_", 2)) { - if (strEQ(name, "F_ALLOCSP")) + if (*name == '_') { + name++; + if (strEQ(name, "ALLOCSP")) #ifdef F_ALLOCSP return F_ALLOCSP; #else goto not_there; #endif - if (strEQ(name, "F_ALLOCSP64")) + if (strEQ(name, "ALLOCSP64")) #ifdef F_ALLOCSP64 return F_ALLOCSP64; #else goto not_there; #endif - if (strEQ(name, "F_COMPAT")) + if (strEQ(name, "COMPAT")) #ifdef F_COMPAT return F_COMPAT; #else goto not_there; #endif - if (strEQ(name, "F_DUP2FD")) + if (strEQ(name, "DUP2FD")) #ifdef F_DUP2FD return F_DUP2FD; #else goto not_there; #endif - if (strEQ(name, "F_DUPFD")) + if (strEQ(name, "DUPFD")) #ifdef F_DUPFD return F_DUPFD; #else goto not_there; #endif - if (strEQ(name, "F_EXLCK")) + if (strEQ(name, "EXLCK")) #ifdef F_EXLCK return F_EXLCK; #else goto not_there; #endif - if (strEQ(name, "F_FREESP")) + if (strEQ(name, "FREESP")) #ifdef F_FREESP return F_FREESP; #else goto not_there; #endif - if (strEQ(name, "F_FREESP64")) + if (strEQ(name, "FREESP64")) #ifdef F_FREESP64 return F_FREESP64; #else goto not_there; #endif - if (strEQ(name, "F_FSYNC")) + if (strEQ(name, "FSYNC")) #ifdef F_FSYNC return F_FSYNC; #else goto not_there; #endif - if (strEQ(name, "F_FSYNC64")) + if (strEQ(name, "FSYNC64")) #ifdef F_FSYNC64 return F_FSYNC64; #else goto not_there; #endif - if (strEQ(name, "F_GETFD")) + if (strEQ(name, "GETFD")) #ifdef F_GETFD return F_GETFD; #else goto not_there; #endif - if (strEQ(name, "F_GETFL")) + if (strEQ(name, "GETFL")) #ifdef F_GETFL return F_GETFL; #else goto not_there; #endif - if (strEQ(name, "F_GETLK")) + if (strEQ(name, "GETLK")) #ifdef F_GETLK return F_GETLK; #else goto not_there; #endif - if (strEQ(name, "F_GETLK64")) + if (strEQ(name, "GETLK64")) #ifdef F_GETLK64 return F_GETLK64; #else goto not_there; #endif - if (strEQ(name, "F_GETOWN")) + if (strEQ(name, "GETOWN")) #ifdef F_GETOWN return F_GETOWN; #else goto not_there; #endif - if (strEQ(name, "F_NODNY")) + if (strEQ(name, "NODNY")) #ifdef F_NODNY return F_NODNY; #else goto not_there; #endif - if (strEQ(name, "F_POSIX")) + if (strEQ(name, "POSIX")) #ifdef F_POSIX return F_POSIX; #else goto not_there; #endif - if (strEQ(name, "F_RDACC")) + if (strEQ(name, "RDACC")) #ifdef F_RDACC return F_RDACC; #else goto not_there; #endif - if (strEQ(name, "F_RDDNY")) + if (strEQ(name, "RDDNY")) #ifdef F_RDDNY return F_RDDNY; #else goto not_there; #endif - if (strEQ(name, "F_RDLCK")) + if (strEQ(name, "RDLCK")) #ifdef F_RDLCK return F_RDLCK; #else goto not_there; #endif - if (strEQ(name, "F_RWACC")) + if (strEQ(name, "RWACC")) #ifdef F_RWACC return F_RWACC; #else goto not_there; #endif - if (strEQ(name, "F_RWDNY")) + if (strEQ(name, "RWDNY")) #ifdef F_RWDNY return F_RWDNY; #else goto not_there; #endif - if (strEQ(name, "F_SETFD")) + if (strEQ(name, "SETFD")) #ifdef F_SETFD return F_SETFD; #else goto not_there; #endif - if (strEQ(name, "F_SETFL")) + if (strEQ(name, "SETFL")) #ifdef F_SETFL return F_SETFL; #else goto not_there; #endif - if (strEQ(name, "F_SETLK")) + if (strEQ(name, "SETLK")) #ifdef F_SETLK return F_SETLK; #else goto not_there; #endif - if (strEQ(name, "F_SETLK64")) + if (strEQ(name, "SETLK64")) #ifdef F_SETLK64 return F_SETLK64; #else goto not_there; #endif - if (strEQ(name, "F_SETLKW")) + if (strEQ(name, "SETLKW")) #ifdef F_SETLKW return F_SETLKW; #else goto not_there; #endif - if (strEQ(name, "F_SETLKW64")) + if (strEQ(name, "SETLKW64")) #ifdef F_SETLKW64 return F_SETLKW64; #else goto not_there; #endif - if (strEQ(name, "F_SETOWN")) + if (strEQ(name, "SETOWN")) #ifdef F_SETOWN return F_SETOWN; #else goto not_there; #endif - if (strEQ(name, "F_SHARE")) + if (strEQ(name, "SHARE")) #ifdef F_SHARE return F_SHARE; #else goto not_there; #endif - if (strEQ(name, "F_SHLCK")) + if (strEQ(name, "SHLCK")) #ifdef F_SHLCK return F_SHLCK; #else goto not_there; #endif - if (strEQ(name, "F_UNLCK")) + if (strEQ(name, "UNLCK")) #ifdef F_UNLCK return F_UNLCK; #else goto not_there; #endif - if (strEQ(name, "F_UNSHARE")) + if (strEQ(name, "UNSHARE")) #ifdef F_UNSHARE return F_UNSHARE; #else goto not_there; #endif - if (strEQ(name, "F_WRACC")) + if (strEQ(name, "WRACC")) #ifdef F_WRACC return F_WRACC; #else goto not_there; #endif - if (strEQ(name, "F_WRDNY")) + if (strEQ(name, "WRDNY")) #ifdef F_WRDNY return F_WRDNY; #else goto not_there; #endif - if (strEQ(name, "F_WRLCK")) + if (strEQ(name, "WRLCK")) #ifdef F_WRLCK return F_WRLCK; #else @@ -274,79 +275,79 @@ constant(char *name, int arg) errno = EINVAL; return 0; } - if (strEQ(name, "FAPPEND")) + if (strEQ(name, "APPEND")) #ifdef FAPPEND return FAPPEND; #else goto not_there; #endif - if (strEQ(name, "FASYNC")) + if (strEQ(name, "ASYNC")) #ifdef FASYNC return FASYNC; #else goto not_there; #endif - if (strEQ(name, "FCREAT")) + if (strEQ(name, "CREAT")) #ifdef FCREAT return FCREAT; #else goto not_there; #endif - if (strEQ(name, "FD_CLOEXEC")) + if (strEQ(name, "D_CLOEXEC")) #ifdef FD_CLOEXEC return FD_CLOEXEC; #else goto not_there; #endif - if (strEQ(name, "FDEFER")) + if (strEQ(name, "DEFER")) #ifdef FDEFER return FDEFER; #else goto not_there; #endif - if (strEQ(name, "FDSYNC")) + if (strEQ(name, "DSYNC")) #ifdef FDSYNC return FDSYNC; #else goto not_there; #endif - if (strEQ(name, "FEXCL")) + if (strEQ(name, "EXCL")) #ifdef FEXCL return FEXCL; #else goto not_there; #endif - if (strEQ(name, "FLARGEFILE")) + if (strEQ(name, "LARGEFILE")) #ifdef FLARGEFILE return FLARGEFILE; #else goto not_there; #endif - if (strEQ(name, "FNDELAY")) + if (strEQ(name, "NDELAY")) #ifdef FNDELAY return FNDELAY; #else goto not_there; #endif - if (strEQ(name, "FNONBLOCK")) + if (strEQ(name, "NONBLOCK")) #ifdef FNONBLOCK return FNONBLOCK; #else goto not_there; #endif - if (strEQ(name, "FRSYNC")) + if (strEQ(name, "RSYNC")) #ifdef FRSYNC return FRSYNC; #else goto not_there; #endif - if (strEQ(name, "FSYNC")) + if (strEQ(name, "SYNC")) #ifdef FSYNC return FSYNC; #else goto not_there; #endif - if (strEQ(name, "FTRUNC")) + if (strEQ(name, "TRUNC")) #ifdef FTRUNC return FTRUNC; #else @@ -354,28 +355,29 @@ constant(char *name, int arg) #endif break; case 'L': - if (strnEQ(name, "LOCK_", 5)) { + if (strnEQ(name, "OCK_", 4)) { /* We support flock() on systems which don't have it, so always supply the constants. */ - if (strEQ(name, "LOCK_SH")) + name += 4; + if (strEQ(name, "SH")) #ifdef LOCK_SH return LOCK_SH; #else return 1; #endif - if (strEQ(name, "LOCK_EX")) + if (strEQ(name, "EX")) #ifdef LOCK_EX return LOCK_EX; #else return 2; #endif - if (strEQ(name, "LOCK_NB")) + if (strEQ(name, "NB")) #ifdef LOCK_NB return LOCK_NB; #else return 4; #endif - if (strEQ(name, "LOCK_UN")) + if (strEQ(name, "UN")) #ifdef LOCK_UN return LOCK_UN; #else @@ -385,188 +387,189 @@ constant(char *name, int arg) goto not_there; break; case 'O': - if (strnEQ(name, "O_", 2)) { - if (strEQ(name, "O_ACCMODE")) + if (name[0] == '_') { + name++; + if (strEQ(name, "ACCMODE")) #ifdef O_ACCMODE return O_ACCMODE; #else goto not_there; #endif - if (strEQ(name, "O_APPEND")) + if (strEQ(name, "APPEND")) #ifdef O_APPEND return O_APPEND; #else goto not_there; #endif - if (strEQ(name, "O_ASYNC")) + if (strEQ(name, "ASYNC")) #ifdef O_ASYNC return O_ASYNC; #else goto not_there; #endif - if (strEQ(name, "O_BINARY")) + if (strEQ(name, "BINARY")) #ifdef O_BINARY return O_BINARY; #else goto not_there; #endif - if (strEQ(name, "O_CREAT")) + if (strEQ(name, "CREAT")) #ifdef O_CREAT return O_CREAT; #else goto not_there; #endif - if (strEQ(name, "O_DEFER")) + if (strEQ(name, "DEFER")) #ifdef O_DEFER return O_DEFER; #else goto not_there; #endif - if (strEQ(name, "O_DIRECT")) + if (strEQ(name, "DIRECT")) #ifdef O_DIRECT return O_DIRECT; #else goto not_there; #endif - if (strEQ(name, "O_DIRECTORY")) + if (strEQ(name, "DIRECTORY")) #ifdef O_DIRECTORY return O_DIRECTORY; #else goto not_there; #endif - if (strEQ(name, "O_DSYNC")) + if (strEQ(name, "DSYNC")) #ifdef O_DSYNC return O_DSYNC; #else goto not_there; #endif - if (strEQ(name, "O_EXCL")) + if (strEQ(name, "EXCL")) #ifdef O_EXCL return O_EXCL; #else goto not_there; #endif - if (strEQ(name, "O_EXLOCK")) + if (strEQ(name, "EXLOCK")) #ifdef O_EXLOCK return O_EXLOCK; #else goto not_there; #endif - if (strEQ(name, "O_LARGEFILE")) + if (strEQ(name, "LARGEFILE")) #ifdef O_LARGEFILE return O_LARGEFILE; #else goto not_there; #endif - if (strEQ(name, "O_NDELAY")) + if (strEQ(name, "NDELAY")) #ifdef O_NDELAY return O_NDELAY; #else goto not_there; #endif - if (strEQ(name, "O_NOCTTY")) + if (strEQ(name, "NOCTTY")) #ifdef O_NOCTTY return O_NOCTTY; #else goto not_there; #endif - if (strEQ(name, "O_NOFOLLOW")) + if (strEQ(name, "NOFOLLOW")) #ifdef O_NOFOLLOW return O_NOFOLLOW; #else goto not_there; #endif - if (strEQ(name, "O_NOINHERIT")) + if (strEQ(name, "NOINHERIT")) #ifdef O_NOINHERIT return O_NOINHERIT; #else goto not_there; #endif - if (strEQ(name, "O_NONBLOCK")) + if (strEQ(name, "NONBLOCK")) #ifdef O_NONBLOCK return O_NONBLOCK; #else goto not_there; #endif - if (strEQ(name, "O_RANDOM")) + if (strEQ(name, "RANDOM")) #ifdef O_RANDOM return O_RANDOM; #else goto not_there; #endif - if (strEQ(name, "O_RAW")) + if (strEQ(name, "RAW")) #ifdef O_RAW return O_RAW; #else goto not_there; #endif - if (strEQ(name, "O_RDONLY")) + if (strEQ(name, "RDONLY")) #ifdef O_RDONLY return O_RDONLY; #else goto not_there; #endif - if (strEQ(name, "O_RDWR")) + if (strEQ(name, "RDWR")) #ifdef O_RDWR return O_RDWR; #else goto not_there; #endif - if (strEQ(name, "O_RSYNC")) + if (strEQ(name, "RSYNC")) #ifdef O_RSYNC return O_RSYNC; #else goto not_there; #endif - if (strEQ(name, "O_SEQUENTIAL")) + if (strEQ(name, "SEQUENTIAL")) #ifdef O_SEQUENTIAL return O_SEQUENTIAL; #else goto not_there; #endif - if (strEQ(name, "O_SHLOCK")) + if (strEQ(name, "SHLOCK")) #ifdef O_SHLOCK return O_SHLOCK; #else goto not_there; #endif - if (strEQ(name, "O_SYNC")) + if (strEQ(name, "SYNC")) #ifdef O_SYNC return O_SYNC; #else goto not_there; #endif - if (strEQ(name, "O_TEMPORARY")) + if (strEQ(name, "TEMPORARY")) #ifdef O_TEMPORARY return O_TEMPORARY; #else goto not_there; #endif - if (strEQ(name, "O_TEXT")) + if (strEQ(name, "TEXT")) #ifdef O_TEXT return O_TEXT; #else goto not_there; #endif - if (strEQ(name, "O_TRUNC")) + if (strEQ(name, "TRUNC")) #ifdef O_TRUNC return O_TRUNC; #else goto not_there; #endif - if (strEQ(name, "O_WRONLY")) + if (strEQ(name, "WRONLY")) #ifdef O_WRONLY return O_WRONLY; #else goto not_there; #endif - if (strEQ(name, "O_ALIAS")) + if (strEQ(name, "ALIAS")) #ifdef O_ALIAS return O_ALIAS; #else goto not_there; #endif - if (strEQ(name, "O_RSRC")) + if (strEQ(name, "RSRC")) #ifdef O_RSRC return O_RSRC; #else @@ -576,171 +579,171 @@ constant(char *name, int arg) goto not_there; break; case 'S': - switch (name[1]) { + switch (*(name++)) { case '_': - if (strEQ(name, "S_ISUID")) + if (strEQ(name, "ISUID")) #ifdef S_ISUID return S_ISUID; #else goto not_there; #endif - if (strEQ(name, "S_ISGID")) + if (strEQ(name, "ISGID")) #ifdef S_ISGID return S_ISGID; #else goto not_there; #endif - if (strEQ(name, "S_ISVTX")) + if (strEQ(name, "ISVTX")) #ifdef S_ISVTX return S_ISVTX; #else goto not_there; #endif - if (strEQ(name, "S_ISTXT")) + if (strEQ(name, "ISTXT")) #ifdef S_ISTXT return S_ISTXT; #else goto not_there; #endif - if (strEQ(name, "S_IFREG")) + if (strEQ(name, "IFREG")) #ifdef S_IFREG return S_IFREG; #else goto not_there; #endif - if (strEQ(name, "S_IFDIR")) + if (strEQ(name, "IFDIR")) #ifdef S_IFDIR return S_IFDIR; #else goto not_there; #endif - if (strEQ(name, "S_IFLNK")) + if (strEQ(name, "IFLNK")) #ifdef S_IFLNK return S_IFLNK; #else goto not_there; #endif - if (strEQ(name, "S_IFSOCK")) + if (strEQ(name, "IFSOCK")) #ifdef S_IFSOCK return S_IFSOCK; #else goto not_there; #endif - if (strEQ(name, "S_IFBLK")) + if (strEQ(name, "IFBLK")) #ifdef S_IFBLK return S_IFBLK; #else goto not_there; #endif - if (strEQ(name, "S_IFCHR")) + if (strEQ(name, "IFCHR")) #ifdef S_IFCHR return S_IFCHR; #else goto not_there; #endif - if (strEQ(name, "S_IFIFO")) + if (strEQ(name, "IFIFO")) #ifdef S_IFIFO return S_IFIFO; #else goto not_there; #endif - if (strEQ(name, "S_IFWHT")) + if (strEQ(name, "IFWHT")) #ifdef S_IFWHT return S_IFWHT; #else goto not_there; #endif - if (strEQ(name, "S_ENFMT")) + if (strEQ(name, "ENFMT")) #ifdef S_ENFMT return S_ENFMT; #else goto not_there; #endif - if (strEQ(name, "S_IRUSR")) + if (strEQ(name, "IRUSR")) #ifdef S_IRUSR return S_IRUSR; #else goto not_there; #endif - if (strEQ(name, "S_IWUSR")) + if (strEQ(name, "IWUSR")) #ifdef S_IWUSR return S_IWUSR; #else goto not_there; #endif - if (strEQ(name, "S_IXUSR")) + if (strEQ(name, "IXUSR")) #ifdef S_IXUSR return S_IXUSR; #else goto not_there; #endif - if (strEQ(name, "S_IRWXU")) + if (strEQ(name, "IRWXU")) #ifdef S_IRWXU return S_IRWXU; #else goto not_there; #endif - if (strEQ(name, "S_IRGRP")) + if (strEQ(name, "IRGRP")) #ifdef S_IRGRP return S_IRGRP; #else goto not_there; #endif - if (strEQ(name, "S_IWGRP")) + if (strEQ(name, "IWGRP")) #ifdef S_IWGRP return S_IWGRP; #else goto not_there; #endif - if (strEQ(name, "S_IXGRP")) + if (strEQ(name, "IXGRP")) #ifdef S_IXGRP return S_IXGRP; #else goto not_there; #endif - if (strEQ(name, "S_IRWXG")) + if (strEQ(name, "IRWXG")) #ifdef S_IRWXG return S_IRWXG; #else goto not_there; #endif - if (strEQ(name, "S_IROTH")) + if (strEQ(name, "IROTH")) #ifdef S_IROTH return S_IROTH; #else goto not_there; #endif - if (strEQ(name, "S_IWOTH")) + if (strEQ(name, "IWOTH")) #ifdef S_IWOTH return S_IWOTH; #else goto not_there; #endif - if (strEQ(name, "S_IXOTH")) + if (strEQ(name, "IXOTH")) #ifdef S_IXOTH return S_IXOTH; #else goto not_there; #endif - if (strEQ(name, "S_IRWXO")) + if (strEQ(name, "IRWXO")) #ifdef S_IRWXO return S_IRWXO; #else goto not_there; #endif - if (strEQ(name, "S_IREAD")) + if (strEQ(name, "IREAD")) #ifdef S_IREAD return S_IREAD; #else goto not_there; #endif - if (strEQ(name, "S_IWRITE")) + if (strEQ(name, "IWRITE")) #ifdef S_IWRITE return S_IWRITE; #else goto not_there; #endif - if (strEQ(name, "S_IEXEC")) + if (strEQ(name, "IEXEC")) #ifdef S_IEXEC return S_IEXEC; #else @@ -748,19 +751,19 @@ constant(char *name, int arg) #endif break; case 'E': - if (strEQ(name, "SEEK_CUR")) + if (strEQ(name, "EK_CUR")) #ifdef SEEK_CUR return SEEK_CUR; #else return 1; #endif - if (strEQ(name, "SEEK_END")) + if (strEQ(name, "EK_END")) #ifdef SEEK_END return SEEK_END; #else return 2; #endif - if (strEQ(name, "SEEK_SET")) + if (strEQ(name, "EK_SET")) #ifdef SEEK_SET return SEEK_SET; #else @@ -780,8 +783,7 @@ not_there: MODULE = Fcntl PACKAGE = Fcntl -double -constant(name,arg) +IV +constant(name) char * name - int arg diff --git a/ext/Filter/Util/Call/Call.pm b/ext/Filter/Util/Call/Call.pm new file mode 100644 index 0000000000..694b1b3b20 --- /dev/null +++ b/ext/Filter/Util/Call/Call.pm @@ -0,0 +1,474 @@ +package Filter::Util::Call ; + +require 5.002 ; +require DynaLoader; +require Exporter; +use Carp ; +use strict; +use vars qw($VERSION @ISA @EXPORT) ; + +@ISA = qw(Exporter DynaLoader); +@EXPORT = qw( filter_add filter_del filter_read filter_read_exact) ; +$VERSION = "1.04" ; + +sub filter_read_exact($) +{ + my ($size) = @_ ; + my ($left) = $size ; + my ($status) ; + + croak ("filter_read_exact: size parameter must be > 0") + unless $size > 0 ; + + # try to read a block which is exactly $size bytes long + while ($left and ($status = filter_read($left)) > 0) { + $left = $size - length $_ ; + } + + # EOF with pending data is a special case + return 1 if $status == 0 and length $_ ; + + return $status ; +} + +sub filter_add($) +{ + my($obj) = @_ ; + + # Did we get a code reference? + my $coderef = (ref $obj eq 'CODE') ; + + # If the parameter isn't already a reference, make it one. + $obj = \$obj unless ref $obj ; + + $obj = bless ($obj, (caller)[0]) unless $coderef ; + + # finish off the installation of the filter in C. + Filter::Util::Call::real_import($obj, (caller)[0], $coderef) ; +} + +bootstrap Filter::Util::Call ; + +1; +__END__ + +=head1 NAME + +Filter::Util::Call - Perl Source Filter Utility Module + +=head1 SYNOPSIS + + use Filter::Util::Call ; + +=head1 DESCRIPTION + +This module provides you with the framework to write I<Source Filters> +in Perl. + +A I<Perl Source Filter> is implemented as a Perl module. The structure +of the module can take one of two broadly similar formats. To +distinguish between them, the first will be referred to as I<method +filter> and the second as I<closure filter>. + +Here is a skeleton for the I<method filter>: + + package MyFilter ; + + use Filter::Util::Call ; + + sub import + { + my($type, @arguments) = @_ ; + filter_add([]) ; + } + + sub filter + { + my($self) = @_ ; + my($status) ; + + $status = filter_read() ; + $status ; + } + + 1 ; + +and this is the equivalent skeleton for the I<closure filter>: + + package MyFilter ; + + use Filter::Util::Call ; + + sub import + { + my($type, @arguments) = @_ ; + + filter_add( + sub + { + my($status) ; + $status = filter_read() ; + $status ; + } ) + } + + 1 ; + +To make use of either of the two filter modules above, place the line +below in a Perl source file. + + use MyFilter; + +In fact, the skeleton modules shown above are fully functional I<Source +Filters>, albeit fairly useless ones. All they does is filter the +source stream without modifying it at all. + +As you can see both modules have a broadly similar structure. They both +make use of the C<Filter::Util::Call> module and both have an C<import> +method. The difference between them is that the I<method filter> +requires a I<filter> method, whereas the I<closure filter> gets the +equivalent of a I<filter> method with the anonymous sub passed to +I<filter_add>. + +To make proper use of the I<closure filter> shown above you need to +have a good understanding of the concept of a I<closure>. See +L<perlref> for more details on the mechanics of I<closures>. + +=head2 B<use Filter::Util::Call> + +The following functions are exported by C<Filter::Util::Call>: + + filter_add() + filter_read() + filter_read_exact() + filter_del() + +=head2 B<import()> + +The C<import> method is used to create an instance of the filter. It is +called indirectly by Perl when it encounters the C<use MyFilter> line +in a source file (See L<perlfunc/import> for more details on +C<import>). + +It will always have at least one parameter automatically passed by Perl +- this corresponds to the name of the package. In the example above it +will be C<"MyFilter">. + +Apart from the first parameter, import can accept an optional list of +parameters. These can be used to pass parameters to the filter. For +example: + + use MyFilter qw(a b c) ; + +will result in the C<@_> array having the following values: + + @_ [0] => "MyFilter" + @_ [1] => "a" + @_ [2] => "b" + @_ [3] => "c" + +Before terminating, the C<import> function must explicitly install the +filter by calling C<filter_add>. + +B<filter_add()> + +The function, C<filter_add>, actually installs the filter. It takes one +parameter which should be a reference. The kind of reference used will +dictate which of the two filter types will be used. + +If a CODE reference is used then a I<closure filter> will be assumed. + +If a CODE reference is not used, a I<method filter> will be assumed. +In a I<method filter>, the reference can be used to store context +information. The reference will be I<blessed> into the package by +C<filter_add>. + +See the filters at the end of this documents for examples of using +context information using both I<method filters> and I<closure +filters>. + +=head2 B<filter() and anonymous sub> + +Both the C<filter> method used with a I<method filter> and the +anonymous sub used with a I<closure filter> is where the main +processing for the filter is done. + +The big difference between the two types of filter is that the I<method +filter> uses the object passed to the method to store any context data, +whereas the I<closure filter> uses the lexical variables that are +maintained by the closure. + +Note that the single parameter passed to the I<method filter>, +C<$self>, is the same reference that was passed to C<filter_add> +blessed into the filter's package. See the example filters later on for +details of using C<$self>. + +Here is a list of the common features of the anonymous sub and the +C<filter()> method. + +=over 5 + +=item B<$_> + +Although C<$_> doesn't actually appear explicitly in the sample filters +above, it is implicitly used in a number of places. + +Firstly, when either C<filter> or the anonymous sub are called, a local +copy of C<$_> will automatically be created. It will always contain the +empty string at this point. + +Next, both C<filter_read> and C<filter_read_exact> will append any +source data that is read to the end of C<$_>. + +Finally, when C<filter> or the anonymous sub are finished processing, +they are expected to return the filtered source using C<$_>. + +This implicit use of C<$_> greatly simplifies the filter. + +=item B<$status> + +The status value that is returned by the user's C<filter> method or +anonymous sub and the C<filter_read> and C<read_exact> functions take +the same set of values, namely: + + < 0 Error + = 0 EOF + > 0 OK + +=item B<filter_read> and B<filter_read_exact> + +These functions are used by the filter to obtain either a line or block +from the next filter in the chain or the actual source file if there +aren't any other filters. + +The function C<filter_read> takes two forms: + + $status = filter_read() ; + $status = filter_read($size) ; + +The first form is used to request a I<line>, the second requests a +I<block>. + +In line mode, C<filter_read> will append the next source line to the +end of the C<$_> scalar. + +In block mode, C<filter_read> will append a block of data which is <= +C<$size> to the end of the C<$_> scalar. It is important to emphasise +the that C<filter_read> will not necessarily read a block which is +I<precisely> C<$size> bytes. + +If you need to be able to read a block which has an exact size, you can +use the function C<filter_read_exact>. It works identically to +C<filter_read> in block mode, except it will try to read a block which +is exactly C<$size> bytes in length. The only circumstances when it +will not return a block which is C<$size> bytes long is on EOF or +error. + +It is I<very> important to check the value of C<$status> after I<every> +call to C<filter_read> or C<filter_read_exact>. + +=item B<filter_del> + +The function, C<filter_del>, is used to disable the current filter. It +does not affect the running of the filter. All it does is tell Perl not +to call filter any more. + +See L<Example 4: Using filter_del> for details. + +=back + +=head1 EXAMPLES + +Here are a few examples which illustrate the key concepts - as such +most of them are of little practical use. + +The C<examples> sub-directory has copies of all these filters +implemented both as I<method filters> and as I<closure filters>. + +=head2 Example 1: A simple filter. + +Below is a I<method filter> which is hard-wired to replace all +occurrences of the string C<"Joe"> to C<"Jim">. Not particularly +Useful, but it is the first example and I wanted to keep it simple. + + package Joe2Jim ; + + use Filter::Util::Call ; + + sub import + { + my($type) = @_ ; + + filter_add(bless []) ; + } + + sub filter + { + my($self) = @_ ; + my($status) ; + + s/Joe/Jim/g + if ($status = filter_read()) > 0 ; + $status ; + } + + 1 ; + +Here is an example of using the filter: + + use Joe2Jim ; + print "Where is Joe?\n" ; + +And this is what the script above will print: + + Where is Jim? + +=head2 Example 2: Using the context + +The previous example was not particularly useful. To make it more +general purpose we will make use of the context data and allow any +arbitrary I<from> and I<to> strings to be used. This time we will use a +I<closure filter>. To reflect its enhanced role, the filter is called +C<Subst>. + + package Subst ; + + use Filter::Util::Call ; + use Carp ; + + sub import + { + croak("usage: use Subst qw(from to)") + unless @_ == 3 ; + my ($self, $from, $to) = @_ ; + filter_add( + sub + { + my ($status) ; + s/$from/$to/ + if ($status = filter_read()) > 0 ; + $status ; + }) + } + 1 ; + +and is used like this: + + use Subst qw(Joe Jim) ; + print "Where is Joe?\n" ; + + +=head2 Example 3: Using the context within the filter + +Here is a filter which a variation of the C<Joe2Jim> filter. As well as +substituting all occurrences of C<"Joe"> to C<"Jim"> it keeps a count +of the number of substitutions made in the context object. + +Once EOF is detected (C<$status> is zero) the filter will insert an +extra line into the source stream. When this extra line is executed it +will print a count of the number of substitutions actually made. +Note that C<$status> is set to C<1> in this case. + + package Count ; + + use Filter::Util::Call ; + + sub filter + { + my ($self) = @_ ; + my ($status) ; + + if (($status = filter_read()) > 0 ) { + s/Joe/Jim/g ; + ++ $$self ; + } + elsif ($$self >= 0) { # EOF + $_ = "print q[Made ${$self} substitutions\n]" ; + $status = 1 ; + $$self = -1 ; + } + + $status ; + } + + sub import + { + my ($self) = @_ ; + my ($count) = 0 ; + filter_add(\$count) ; + } + + 1 ; + +Here is a script which uses it: + + use Count ; + print "Hello Joe\n" ; + print "Where is Joe\n" ; + +Outputs: + + Hello Jim + Where is Jim + Made 2 substitutions + +=head2 Example 4: Using filter_del + +Another variation on a theme. This time we will modify the C<Subst> +filter to allow a starting and stopping pattern to be specified as well +as the I<from> and I<to> patterns. If you know the I<vi> editor, it is +the equivalent of this command: + + :/start/,/stop/s/from/to/ + +When used as a filter we want to invoke it like this: + + use NewSubst qw(start stop from to) ; + +Here is the module. + + package NewSubst ; + + use Filter::Util::Call ; + use Carp ; + + sub import + { + my ($self, $start, $stop, $from, $to) = @_ ; + my ($found) = 0 ; + croak("usage: use Subst qw(start stop from to)") + unless @_ == 5 ; + + filter_add( + sub + { + my ($status) ; + + if (($status = filter_read()) > 0) { + + $found = 1 + if $found == 0 and /$start/ ; + + if ($found) { + s/$from/$to/ ; + filter_del() if /$stop/ ; + } + + } + $status ; + } ) + + } + + 1 ; + +=head1 AUTHOR + +Paul Marquess + +=head1 DATE + +26th January 1996 + +=cut + diff --git a/ext/Filter/Util/Call/Call.xs b/ext/Filter/Util/Call/Call.xs new file mode 100644 index 0000000000..c8105d0d43 --- /dev/null +++ b/ext/Filter/Util/Call/Call.xs @@ -0,0 +1,252 @@ +/* + * Filename : Call.xs + * + * Author : Paul Marquess + * Date : 26th March 2000 + * Version : 1.05 + * + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#ifndef PERL_VERSION +# include "patchlevel.h" +# define PERL_REVISION 5 +# define PERL_VERSION PATCHLEVEL +# define PERL_SUBVERSION SUBVERSION +#endif + +/* defgv must be accessed differently under threaded perl */ +/* DEFSV et al are in 5.004_56 */ +#ifndef DEFSV +# define DEFSV GvSV(defgv) +#endif + +#ifndef pTHX +# define pTHX +# define pTHX_ +# define aTHX +# define aTHX_ +#endif + + +/* Internal defines */ +#define PERL_MODULE(s) IoBOTTOM_NAME(s) +#define PERL_OBJECT(s) IoTOP_GV(s) +#define FILTER_ACTIVE(s) IoLINES(s) +#define BUF_OFFSET(sv) IoPAGE_LEN(sv) +#define CODE_REF(sv) IoPAGE(sv) + +#define SET_LEN(sv,len) \ + do { SvPVX(sv)[len] = '\0'; SvCUR_set(sv, len); } while (0) + + + +static int fdebug = 0; +static int current_idx ; + +static I32 +filter_call(pTHX_ int idx, SV *buf_sv, int maxlen) +{ + SV *my_sv = FILTER_DATA(idx); + char *nl = "\n"; + char *p; + char *out_ptr; + int n; + + if (fdebug) + warn("**** In filter_call - maxlen = %d, out len buf = %d idx = %d my_sv = %d [%s]\n", + maxlen, SvCUR(buf_sv), idx, SvCUR(my_sv), SvPVX(my_sv) ) ; + + while (1) { + + /* anything left from last time */ + if (n = SvCUR(my_sv)) { + + out_ptr = SvPVX(my_sv) + BUF_OFFSET(my_sv) ; + + if (maxlen) { + /* want a block */ + if (fdebug) + warn("BLOCK(%d): size = %d, maxlen = %d\n", + idx, n, maxlen) ; + + sv_catpvn(buf_sv, out_ptr, maxlen > n ? n : maxlen ); + if(n <= maxlen) { + BUF_OFFSET(my_sv) = 0 ; + SET_LEN(my_sv, 0) ; + } + else { + BUF_OFFSET(my_sv) += maxlen ; + SvCUR_set(my_sv, n - maxlen) ; + } + return SvCUR(buf_sv); + } + else { + /* want lines */ + if (p = ninstr(out_ptr, out_ptr + n - 1, nl, nl)) { + + sv_catpvn(buf_sv, out_ptr, p - out_ptr + 1); + + n = n - (p - out_ptr + 1); + BUF_OFFSET(my_sv) += (p - out_ptr + 1); + SvCUR_set(my_sv, n) ; + if (fdebug) + warn("recycle %d - leaving %d, returning %d [%s]", + idx, n, SvCUR(buf_sv), SvPVX(buf_sv)) ; + + return SvCUR(buf_sv); + } + else /* no EOL, so append the complete buffer */ + sv_catpvn(buf_sv, out_ptr, n) ; + } + + } + + + SET_LEN(my_sv, 0) ; + BUF_OFFSET(my_sv) = 0 ; + + if (FILTER_ACTIVE(my_sv)) + { + dSP ; + int count ; + + if (fdebug) + warn("gonna call %s::filter\n", PERL_MODULE(my_sv)) ; + + ENTER ; + SAVETMPS; + + SAVEINT(current_idx) ; /* save current idx */ + current_idx = idx ; + + SAVESPTR(DEFSV) ; /* save $_ */ + /* make $_ use our buffer */ + DEFSV = sv_2mortal(newSVpv("", 0)) ; + + PUSHMARK(sp) ; + + if (CODE_REF(my_sv)) { + /* if (SvROK(PERL_OBJECT(my_sv)) && SvTYPE(SvRV(PERL_OBJECT(my_sv))) == SVt_PVCV) { */ + count = perl_call_sv((SV*)PERL_OBJECT(my_sv), G_SCALAR); + } + else { + XPUSHs((SV*)PERL_OBJECT(my_sv)) ; + + PUTBACK ; + + count = perl_call_method("filter", G_SCALAR); + } + + SPAGAIN ; + + if (count != 1) + croak("Filter::Util::Call - %s::filter returned %d values, 1 was expected \n", + PERL_MODULE(my_sv), count ) ; + + n = POPi ; + + if (fdebug) + warn("status = %d, length op buf = %d [%s]\n", + n, SvCUR(DEFSV), SvPVX(DEFSV) ) ; + if (SvCUR(DEFSV)) + sv_setpvn(my_sv, SvPVX(DEFSV), SvCUR(DEFSV)) ; + + PUTBACK ; + FREETMPS ; + LEAVE ; + } + else + n = FILTER_READ(idx + 1, my_sv, maxlen) ; + + if (n <= 0) + { + /* Either EOF or an error */ + + if (fdebug) + warn ("filter_read %d returned %d , returning %d\n", idx, n, + (SvCUR(buf_sv)>0) ? SvCUR(buf_sv) : n); + + /* PERL_MODULE(my_sv) ; */ + /* PERL_OBJECT(my_sv) ; */ + filter_del(filter_call); + + /* If error, return the code */ + if (n < 0) + return n ; + + /* return what we have so far else signal eof */ + return (SvCUR(buf_sv)>0) ? SvCUR(buf_sv) : n; + } + + } +} + + + +MODULE = Filter::Util::Call PACKAGE = Filter::Util::Call + +REQUIRE: 1.924 +PROTOTYPES: ENABLE + +#define IDX current_idx + +int +filter_read(size=0) + int size + CODE: + { + SV * buffer = DEFSV ; + + RETVAL = FILTER_READ(IDX + 1, buffer, size) ; + } + OUTPUT: + RETVAL + + + + +void +real_import(object, perlmodule, coderef) + SV * object + char * perlmodule + int coderef + PPCODE: + { + SV * sv = newSV(1) ; + + (void)SvPOK_only(sv) ; + filter_add(filter_call, sv) ; + + PERL_MODULE(sv) = savepv(perlmodule) ; + PERL_OBJECT(sv) = (GV*) newSVsv(object) ; + FILTER_ACTIVE(sv) = TRUE ; + BUF_OFFSET(sv) = 0 ; + CODE_REF(sv) = coderef ; + + SvCUR_set(sv, 0) ; + + } + +void +filter_del() + CODE: + FILTER_ACTIVE(FILTER_DATA(IDX)) = FALSE ; + + + +void +unimport(...) + PPCODE: + filter_del(filter_call); + + +BOOT: + /* temporary hack to control debugging in toke.c */ + if (fdebug) + filter_add(NULL, (fdebug) ? (SV*)"1" : (SV*)"0"); + + diff --git a/ext/Filter/Util/Call/Makefile.PL b/ext/Filter/Util/Call/Makefile.PL new file mode 100644 index 0000000000..030dbc2d31 --- /dev/null +++ b/ext/Filter/Util/Call/Makefile.PL @@ -0,0 +1,7 @@ +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => 'Filter::Util::Call', + VERSION_FROM => 'Call.pm', + MAN3PODS => {}, # Pods will be built by installman. +); diff --git a/ext/GDBM_File/GDBM_File.pm b/ext/GDBM_File/GDBM_File.pm index ab866eecab..fe87dd0856 100644 --- a/ext/GDBM_File/GDBM_File.pm +++ b/ext/GDBM_File/GDBM_File.pm @@ -59,7 +59,7 @@ use XSLoader (); GDBM_WRITER ); -$VERSION = "1.03"; +$VERSION = "1.04"; sub AUTOLOAD { my($constname); diff --git a/ext/GDBM_File/GDBM_File.xs b/ext/GDBM_File/GDBM_File.xs index 13123ef540..b4d3b3d29c 100644 --- a/ext/GDBM_File/GDBM_File.xs +++ b/ext/GDBM_File/GDBM_File.xs @@ -56,7 +56,7 @@ not_here(char *s) static void output_datum(pTHX_ SV *arg, char *str, int size) { -#if !defined(MYMALLOC) || (defined(MYMALLOC) && defined(PERL_POLLUTE_MALLOC) && !defined(LEAKTEST)) +#if (!defined(MYMALLOC) || (defined(MYMALLOC) && defined(PERL_POLLUTE_MALLOC))) && !defined(LEAKTEST) sv_usepvn(arg, str, size); #else sv_setpvn(arg, str, size); diff --git a/ext/GDBM_File/typemap b/ext/GDBM_File/typemap index 4f79ae3e32..1dd063003a 100644 --- a/ext/GDBM_File/typemap +++ b/ext/GDBM_File/typemap @@ -19,8 +19,14 @@ T_DATUM_K $var.dsize = (int)PL_na; T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); - $var.dptr = SvPV($arg, PL_na); - $var.dsize = (int)PL_na; + if (SvOK($arg)) { + $var.dptr = SvPV($arg, PL_na); + $var.dsize = (int)PL_na; + } + else { + $var.dptr = \"\"; + $var.dsize = 0; + } OUTPUT T_DATUM_K output_datum(aTHX_ $arg, $var.dptr, $var.dsize); diff --git a/ext/IO/IO.xs b/ext/IO/IO.xs index 1b79cfd4c0..13b198cc71 100644 --- a/ext/IO/IO.xs +++ b/ext/IO/IO.xs @@ -59,9 +59,9 @@ io_blocking(InputStream f, int block) if (RETVAL >= 0) { int mode = RETVAL; #ifdef O_NONBLOCK - /* POSIX style */ + /* POSIX style */ #if defined(O_NDELAY) && O_NDELAY != O_NONBLOCK - /* Ooops has O_NDELAY too - make sure we don't + /* Ooops has O_NDELAY too - make sure we don't * get SysV behaviour by mistake. */ /* E.g. In UNICOS and UNICOS/mk a F_GETFL returns an O_NDELAY @@ -86,7 +86,7 @@ io_blocking(InputStream f, int block) } } #else - /* Standard POSIX */ + /* Standard POSIX */ RETVAL = RETVAL & O_NONBLOCK ? 0 : 1; if ((block == 0) && !(mode & O_NONBLOCK)) { @@ -103,11 +103,11 @@ io_blocking(InputStream f, int block) if(ret < 0) RETVAL = ret; } -#endif +#endif #else /* Not POSIX - better have O_NDELAY or we can't cope. * for BSD-ish machines this is an acceptable alternative - * for SysV we can't tell "would block" from EOF but that is + * for SysV we can't tell "would block" from EOF but that is * the way SysV is... */ RETVAL = RETVAL & O_NDELAY ? 0 : 1; @@ -141,13 +141,18 @@ fgetpos(handle) InputStream handle CODE: if (handle) { - Fpos_t pos; #ifdef PerlIO - PerlIO_getpos(handle, &pos); + ST(0) = sv_2mortal(newSV(0)); + if (PerlIO_getpos(handle, ST(0)) != 0) { + ST(0) = &PL_sv_undef; + } #else - fgetpos(handle, &pos); + if (fgetpos(handle, &pos)) { + ST(0) = &PL_sv_undef; + } else { + ST(0) = sv_2mortal(newSVpv((char*)&pos, sizeof(Fpos_t))); + } #endif - ST(0) = sv_2mortal(newSVpv((char*)&pos, sizeof(Fpos_t))); } else { ST(0) = &PL_sv_undef; @@ -159,14 +164,21 @@ fsetpos(handle, pos) InputStream handle SV * pos CODE: - char *p; - STRLEN len; - if (handle && (p = SvPV(pos,len)) && len == sizeof(Fpos_t)) + if (handle) { #ifdef PerlIO - RETVAL = PerlIO_setpos(handle, (Fpos_t*)p); + RETVAL = PerlIO_setpos(handle, pos); #else - RETVAL = fsetpos(handle, (Fpos_t*)p); + char *p; + STRLEN len; + if ((p = SvPV(pos,len)) && len == sizeof(Fpos_t)) { + RETVAL = fsetpos(handle, (Fpos_t*)p); + } + else { + RETVAL = -1; + errno = EINVAL; + } #endif + } else { RETVAL = -1; errno = EINVAL; @@ -202,7 +214,7 @@ new_tmpfile(packname = "IO::File") MODULE = IO PACKAGE = IO::Poll -void +void _poll(timeout,...) int timeout; PPCODE: diff --git a/ext/IO/lib/IO/Handle.pm b/ext/IO/lib/IO/Handle.pm index b6cb410b57..fb754a60bf 100644 --- a/ext/IO/lib/IO/Handle.pm +++ b/ext/IO/lib/IO/Handle.pm @@ -110,7 +110,8 @@ or a file descriptor number. =item $io->opened -Returns true if the object is currently a valid file descriptor. +Returns true if the object is currently a valid file descriptor, false +otherwise. =item $io->getline @@ -139,31 +140,37 @@ called C<format_write>. =item $io->error Returns a true value if the given handle has experienced any errors -since it was opened or since the last call to C<clearerr>. +since it was opened or since the last call to C<clearerr>, or if the +handle is invalid. It only returns false for a valid handle with no +outstanding errors. =item $io->clearerr -Clear the given handle's error indicator. +Clear the given handle's error indicator. Returns -1 if the handle is +invalid, 0 otherwise. =item $io->sync C<sync> synchronizes a file's in-memory state with that on the physical medium. C<sync> does not operate at the perlio api level, but -operates on the file descriptor, this means that any data held at the -perlio api level will not be synchronized. To synchronize data that is -buffered at the perlio api level you must use the flush method. C<sync> -is not implemented on all platforms. See L<fsync(3c)>. +operates on the file descriptor (similar to sysread, sysseek and +systell). This means that any data held at the perlio api level will not +be synchronized. To synchronize data that is buffered at the perlio api +level you must use the flush method. C<sync> is not implemented on all +platforms. Returns "0 but true" on success, C<undef> on error, C<undef> +for an invalid handle. See L<fsync(3c)>. =item $io->flush C<flush> causes perl to flush any buffered data at the perlio api level. Any unread data in the buffer will be discarded, and any unwritten data -will be written to the underlying file descriptor. +will be written to the underlying file descriptor. Returns "0 but true" +on success, C<undef> on error. =item $io->printflush ( ARGS ) Turns on autoflush, print ARGS and then restores the autoflush status of the -C<IO::Handle> object. +C<IO::Handle> object. Returns the return value from print. =item $io->blocking ( [ BOOL ] ) @@ -183,11 +190,18 @@ C<IO::Handle::setbuf> and C<IO::Handle::setvbuf> set the buffering policy for an IO::Handle. The calling sequences for the Perl functions are the same as their C counterparts--including the constants C<_IOFBF>, C<_IOLBF>, and C<_IONBF> for setvbuf()--except that the buffer parameter -specifies a scalar variable to use as a buffer. WARNING: A variable -used as a buffer by C<setbuf> or C<setvbuf> must not be modified in any -way until the IO::Handle is closed or C<setbuf> or C<setvbuf> is called -again, or memory corruption may result! Note that you need to import -the constants C<_IOFBF>, C<_IOLBF>, and C<_IONBF> explicitly. +specifies a scalar variable to use as a buffer. You should only +change the buffer before any I/O, or immediately after calling flush. + +WARNING: A variable used as a buffer by C<setbuf> or C<setvbuf> B<must not +be modified> in any way until the IO::Handle is closed or C<setbuf> or +C<setvbuf> is called again, or memory corruption may result! Remember that +the order of global destruction is undefined, so even if your buffer +variable remains in scope until program termination, it may be undefined +before the file IO::Handle is closed. Note that you need to import the +constants C<_IOFBF>, C<_IOLBF>, and C<_IONBF> explicitly. Like C, setbuf +returns nothing. setvbuf returns "0 but true", on success, C<undef> on +failure. Lastly, there is a special method for working under B<-T> and setuid/gid scripts: @@ -199,7 +213,8 @@ scripts: Marks the object as taint-clean, and as such data read from it will also be considered taint-clean. Note that this is a very trusting action to take, and appropriate consideration for the data source and potential -vulnerability should be kept in mind. +vulnerability should be kept in mind. Returns 0 on success, -1 if setting +the taint-clean flag failed. (eg invalid handle) =back diff --git a/ext/IO/lib/IO/Seekable.pm b/ext/IO/lib/IO/Seekable.pm index e09d48b9bf..243a971acc 100644 --- a/ext/IO/lib/IO/Seekable.pm +++ b/ext/IO/lib/IO/Seekable.pm @@ -18,19 +18,69 @@ C<IO::Seekable> does not have a constructor of its own as it is intended to be inherited by other C<IO::Handle> based objects. It provides methods which allow seeking of the file descriptors. -If the C functions fgetpos() and fsetpos() are available, then -C<$io-E<lt>getpos> returns an opaque value that represents the -current position of the IO::File, and C<$io-E<gt>setpos(POS)> uses -that value to return to a previously visited position. +=over 4 +=item $io->getpos + +Returns an opaque value that represents the current position of the +IO::File, or C<undef> if this is not possible (eg an unseekable stream such +as a terminal, pipe or socket). If the fgetpos() function is available in +your C library it is used to implements getpos, else perl emulates getpos +using C's ftell() function. + +=item $io->setpos + +Uses the value of a previous getpos call to return to a previously visited +position. Returns "0 but true" on success, C<undef> on failure. + +=back + See L<perlfunc> for complete descriptions of each of the following supported C<IO::Seekable> methods, which are just front ends for the corresponding built-in functions: - $io->seek( POS, WHENCE ) - $io->sysseek( POS, WHENCE ) - $io->tell +=over 4 + +=item $io->setpos ( POS, WHENCE ) + +Seek the IO::File to position POS, relative to WHENCE: + +=over 8 + +=item WHENCE=0 (SEEK_SET) + +POS is absolute position. (Seek relative to the start of the file) + +=item WHENCE=1 (SEEK_CUR) + +POS is an offset from the current position. (Seek relative to current) + +=item WHENCE=1 (SEEK_END) + +POS is an offset from the end of the file. (Seek relative to end) + +=back + +The SEEK_* constants can be imported from the C<Fcntl> module if you +don't wish to use the numbers C<0> C<1> or C<2> in your code. + +Returns C<1> upon success, C<0> otherwise. + +=item $io->sysseek( POS, WHENCE ) + +Similar to $io->seek, but sets the IO::File's position using the system +call lseek(2) directly, so will confuse most perl IO operators except +sysread and syswrite (see L<perlfunc> for full details) + +Returns the new position, or C<undef> on failure. A position +of zero is returned as the string C<"0 but true"> + +=item $io->tell + +Returns the IO::File's current position, or -1 on error. +=back + =head1 SEE ALSO L<perlfunc>, diff --git a/ext/IO/lib/IO/Select.pm b/ext/IO/lib/IO/Select.pm index e84b54f015..1a3a26fe6a 100644 --- a/ext/IO/lib/IO/Select.pm +++ b/ext/IO/lib/IO/Select.pm @@ -56,6 +56,7 @@ sub exists sub _fileno { my($self, $f) = @_; + return unless defined $f; $f = $f->[0] if ref($f) eq 'ARRAY'; ($f =~ /^\d+$/) ? $f : fileno($f); } diff --git a/ext/IPC/SysV/SysV.xs b/ext/IPC/SysV/SysV.xs index 38062e028b..4a10eb90e3 100644 --- a/ext/IPC/SysV/SysV.xs +++ b/ext/IPC/SysV/SysV.xs @@ -203,7 +203,7 @@ ftok(path, id) key_t k = ftok(path, id); ST(0) = k == (key_t) -1 ? &PL_sv_undef : sv_2mortal(newSViv(k)); #else - DIE(PL_no_func, "ftok"); + DIE(aTHX_ PL_no_func, "ftok"); #endif int diff --git a/ext/NDBM_File/NDBM_File.pm b/ext/NDBM_File/NDBM_File.pm index c9ef6993a8..99aae17d5c 100644 --- a/ext/NDBM_File/NDBM_File.pm +++ b/ext/NDBM_File/NDBM_File.pm @@ -10,7 +10,7 @@ require Tie::Hash; use XSLoader (); our @ISA = qw(Tie::Hash); -our $VERSION = "1.03"; +our $VERSION = "1.04"; XSLoader::load 'NDBM_File', $VERSION; diff --git a/ext/NDBM_File/NDBM_File.xs b/ext/NDBM_File/NDBM_File.xs index 49a1db5e56..c417eb693e 100644 --- a/ext/NDBM_File/NDBM_File.xs +++ b/ext/NDBM_File/NDBM_File.xs @@ -1,6 +1,11 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" +/* If using the DB3 emulation, ENTER is defined both + * by DB3 and Perl. We drop the Perl definition now. + * See also INSTALL section on DB3. + * -- Stanislav Brabec <utx@penguin.cz> */ +#undef ENTER #include <ndbm.h> typedef struct { diff --git a/ext/NDBM_File/typemap b/ext/NDBM_File/typemap index eeb5d59027..40b95f22c0 100644 --- a/ext/NDBM_File/typemap +++ b/ext/NDBM_File/typemap @@ -20,8 +20,14 @@ T_DATUM_K $var.dsize = (int)PL_na; T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); - $var.dptr = SvPV($arg, PL_na); - $var.dsize = (int)PL_na; + if (SvOK($arg)) { + $var.dptr = SvPV($arg, PL_na); + $var.dsize = (int)PL_na; + } + else { + $var.dptr = \"\"; + $var.dsize = 0; + } T_GDATUM UNIMPLEMENTED OUTPUT diff --git a/ext/ODBM_File/ODBM_File.pm b/ext/ODBM_File/ODBM_File.pm index 732ed60930..4244eb966f 100644 --- a/ext/ODBM_File/ODBM_File.pm +++ b/ext/ODBM_File/ODBM_File.pm @@ -6,7 +6,7 @@ require Tie::Hash; use XSLoader (); our @ISA = qw(Tie::Hash); -our $VERSION = "1.02"; +our $VERSION = "1.03"; XSLoader::load 'ODBM_File', $VERSION; diff --git a/ext/ODBM_File/ODBM_File.xs b/ext/ODBM_File/ODBM_File.xs index 150f2ef894..27174ef062 100644 --- a/ext/ODBM_File/ODBM_File.xs +++ b/ext/ODBM_File/ODBM_File.xs @@ -3,6 +3,11 @@ #include "XSUB.h" #ifdef I_DBM +/* If using the DB3 emulation, ENTER is defined both + * by DB3 and Perl. We drop the Perl definition now. + * See also INSTALL section on DB3. + * -- Stanislav Brabec <utx@penguin.cz> */ +# undef ENTER # include <dbm.h> #else # ifdef I_RPCSVC_DBM diff --git a/ext/ODBM_File/typemap b/ext/ODBM_File/typemap index 7c23815ec7..096427ea7f 100644 --- a/ext/ODBM_File/typemap +++ b/ext/ODBM_File/typemap @@ -20,8 +20,14 @@ T_DATUM_K $var.dsize = (int)PL_na; T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); - $var.dptr = SvPV($arg, PL_na); - $var.dsize = (int)PL_na; + if (SvOK($arg)) { + $var.dptr = SvPV($arg, PL_na); + $var.dsize = (int)PL_na; + } + else { + $var.dptr = \"\"; + $var.dsize = 0; + } T_GDATUM UNIMPLEMENTED OUTPUT diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index 841120c4c6..6a5e30dd17 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -2,18 +2,19 @@ package Opcode; require 5.005_64; +use strict; + our($VERSION, $XS_VERSION, @ISA, @EXPORT_OK); $VERSION = "1.04"; $XS_VERSION = "1.03"; -use strict; use Carp; use Exporter (); use XSLoader (); -@ISA = qw(Exporter); BEGIN { + @ISA = qw(Exporter); @EXPORT_OK = qw( opset ops_to_opset opset_to_ops opset_to_hex invert_opset diff --git a/ext/Opcode/Opcode.xs b/ext/Opcode/Opcode.xs index e191ec7c9c..04f7c3fa33 100644 --- a/ext/Opcode/Opcode.xs +++ b/ext/Opcode/Opcode.xs @@ -253,6 +253,12 @@ PPCODE: save_hptr(&PL_defstash); /* save current default stash */ /* the assignment to global defstash changes our sense of 'main' */ PL_defstash = gv_stashpv(Package, GV_ADDWARN); /* should exist already */ + if (strNE(HvNAME(PL_defstash),"main")) { + Safefree(HvNAME(PL_defstash)); + HvNAME(PL_defstash) = savepv("main"); /* make it think it's in main:: */ + hv_store(PL_defstash,"_",1,(SV *)PL_defgv,0); /* connect _ to global */ + SvREFCNT_inc((SV *)PL_defgv); /* want to keep _ around! */ + } save_hptr(&PL_curstash); PL_curstash = PL_defstash; diff --git a/ext/POSIX/Makefile.PL b/ext/POSIX/Makefile.PL index 55c5c1fbf3..73bb02dddb 100644 --- a/ext/POSIX/Makefile.PL +++ b/ext/POSIX/Makefile.PL @@ -2,12 +2,7 @@ use ExtUtils::MakeMaker; use Config; my @libs; if ($^O ne 'MSWin32') { - if ($Config{archname} =~ /RM\d\d\d-svr4/) { - @libs = ('LIBS' => ["-lm -lc -lposix -lcposix"]); - } - else { - @libs = ('LIBS' => ["-lm -lposix -lcposix"]); - } + @libs = ('LIBS' => ["-lm -lposix -lcposix"]); } WriteMakefile( NAME => 'POSIX', diff --git a/ext/POSIX/POSIX.pm b/ext/POSIX/POSIX.pm index 252e5bbad1..e1e6b28e40 100644 --- a/ext/POSIX/POSIX.pm +++ b/ext/POSIX/POSIX.pm @@ -734,16 +734,6 @@ sub setbuf { redef "IO::Handle::setbuf()"; } -sub setgid { - usage "setgid(gid)" if @_ != 1; - $( = $_[0]; -} - -sub setuid { - usage "setuid(uid)" if @_ != 1; - $< = $_[0]; -} - sub setvbuf { redef "IO::Handle::setvbuf()"; } diff --git a/ext/POSIX/POSIX.pod b/ext/POSIX/POSIX.pod index 314147cb2b..10199e9a2b 100644 --- a/ext/POSIX/POSIX.pod +++ b/ext/POSIX/POSIX.pod @@ -1008,9 +1008,12 @@ see L<perlre>. =item setgid -Sets the real group identifier for this process. -Identical to assigning a value to the Perl's builtin C<$)> variable, -see L<perlvar/$UID>. +Sets the real group identifier and the effective group identifier for +this process. Similar to assigning a value to the Perl's builtin +C<$)> variable, see L<perlvar/$GID>, except that the latter +will change only the real user identifier, and that the setgid() +uses only a single numeric argument, as opposed to a space-separated +list of numbers. =item setjmp @@ -1063,9 +1066,10 @@ setting the session identifier of the current process. =item setuid -Sets the real user identifier for this process. -Identical to assigning a value to the Perl's builtin C<$E<lt>> variable, -see L<perlvar/$UID>. +Sets the real user identifier and the effective user identifier for +this process. Similar to assigning a value to the Perl's builtin +C<$E<lt>> variable, see L<perlvar/$UID>, except that the latter +will change only the real user identifier. =item sigaction @@ -1434,7 +1438,9 @@ Returns a name for a temporary file. $tmpfile = POSIX::tmpnam(); -See also L<File::Temp>. +For security reasons, which are probably detailed in your system's +documentation for the C library tmpnam() function, this interface +should not be used; instead see L<File::Temp>. =item tolower diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index a536671afc..887fcbcbea 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -3940,6 +3940,14 @@ pathconf(filename, name) SysRet pause() +SysRet +setgid(gid) + Gid_t gid + +SysRet +setuid(uid) + Uid_t uid + SysRetLong sysconf(name) int name @@ -3947,3 +3955,4 @@ sysconf(name) char * ttyname(fd) int fd + diff --git a/ext/POSIX/hints/svr4.pl b/ext/POSIX/hints/svr4.pl new file mode 100644 index 0000000000..07f2cb0412 --- /dev/null +++ b/ext/POSIX/hints/svr4.pl @@ -0,0 +1,12 @@ +# NCR MP-RAS. Thanks to Doug Hendricks for this info. +# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0' +# This system needs to explicitly link against -lmw to pull in some +# symbols such as _mwoflocheckl and possibly others. +# A. Dougherty Thu Dec 7 11:55:28 EST 2000 +if ($Config{'archname'} =~ /3441-svr4/) { + $self->{LIBS} = ['-lm -posix -lcposix -lmw']; +} +# Not sure what OS this one is. +elsif ($Config{archname} =~ /RM\d\d\d-svr4/) { + $self->{LIBS} = ['-lm -lc -lposix -lcposix']; +} diff --git a/ext/POSIX/typemap b/ext/POSIX/typemap index baf9bfc051..d54d5d11a0 100644 --- a/ext/POSIX/typemap +++ b/ext/POSIX/typemap @@ -3,6 +3,7 @@ pid_t T_NV Uid_t T_NV Time_t T_NV Gid_t T_NV +Uid_t T_NV Off_t T_NV Dev_t T_NV NV T_NV diff --git a/ext/SDBM_File/Makefile.PL b/ext/SDBM_File/Makefile.PL index a1debb92a3..132bdadf26 100644 --- a/ext/SDBM_File/Makefile.PL +++ b/ext/SDBM_File/Makefile.PL @@ -1,4 +1,5 @@ use ExtUtils::MakeMaker; +use Config; # The existence of the ./sdbm/Makefile.PL file causes MakeMaker # to automatically include Makefile code for the targets @@ -21,18 +22,26 @@ WriteMakefile( sub MY::postamble { if ($^O =~ /MSWin32/ && Win32::IsWin95()) { - # XXX: dmake-specific, like rest of Win95 port - return - ' + if ($Config{'make'} =~ /dmake/i) { + # dmake-specific + return <<EOT; $(MYEXTLIB): sdbm/Makefile @[ cd sdbm $(MAKE) all cd .. ] -'; - } - elsif ($^O ne 'VMS') { +EOT + } elsif ($Config{'make'} =~ /nmake/i) { + # + return <<EOT; +$(MYEXTLIB): sdbm/Makefile + cd sdbm + $(MAKE) all + cd .. +EOT + } +} elsif ($^O ne 'VMS') { ' $(MYEXTLIB): sdbm/Makefile cd sdbm && $(MAKE) all diff --git a/ext/SDBM_File/SDBM_File.pm b/ext/SDBM_File/SDBM_File.pm index b3502b9837..4d1411bf34 100644 --- a/ext/SDBM_File/SDBM_File.pm +++ b/ext/SDBM_File/SDBM_File.pm @@ -6,7 +6,7 @@ require Tie::Hash; use XSLoader (); our @ISA = qw(Tie::Hash); -our $VERSION = "1.02" ; +our $VERSION = "1.03" ; XSLoader::load 'SDBM_File', $VERSION; diff --git a/ext/SDBM_File/sdbm/sdbm.c b/ext/SDBM_File/sdbm/sdbm.c index 64c75cbb20..d41c770dfb 100644 --- a/ext/SDBM_File/sdbm/sdbm.c +++ b/ext/SDBM_File/sdbm/sdbm.c @@ -283,6 +283,10 @@ makroom(register DBM *db, long int hash, int need) { long newp; char twin[PBLKSIZ]; +#if defined(DOSISH) || defined(WIN32) + char zer[PBLKSIZ]; + long oldtail; +#endif char *pag = db->pagbuf; char *New = twin; register int smax = SPLTMAX; @@ -305,6 +309,23 @@ makroom(register DBM *db, long int hash, int need) * still looking at the page of interest. current page is not updated * here, as sdbm_store will do so, after it inserts the incoming pair. */ + +#if defined(DOSISH) || defined(WIN32) + /* + * Fill hole with 0 if made it. + * (hole is NOT read as 0) + */ + oldtail = lseek(db->pagf, 0L, SEEK_END); + memset(zer, 0, PBLKSIZ); + while (OFF_PAG(newp) > oldtail) { + if (lseek(db->pagf, 0L, SEEK_END) < 0 || + write(db->pagf, zer, PBLKSIZ) < 0) { + + return 0; + } + oldtail += PBLKSIZ; + } +#endif if (hash & (db->hmask + 1)) { if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) diff --git a/ext/SDBM_File/typemap b/ext/SDBM_File/typemap index eeb5d59027..40b95f22c0 100644 --- a/ext/SDBM_File/typemap +++ b/ext/SDBM_File/typemap @@ -20,8 +20,14 @@ T_DATUM_K $var.dsize = (int)PL_na; T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); - $var.dptr = SvPV($arg, PL_na); - $var.dsize = (int)PL_na; + if (SvOK($arg)) { + $var.dptr = SvPV($arg, PL_na); + $var.dsize = (int)PL_na; + } + else { + $var.dptr = \"\"; + $var.dsize = 0; + } T_GDATUM UNIMPLEMENTED OUTPUT diff --git a/ext/Storable/ChangeLog b/ext/Storable/ChangeLog index 049ce29583..92789b59a4 100644 --- a/ext/Storable/ChangeLog +++ b/ext/Storable/ChangeLog @@ -1,3 +1,50 @@ +Wed Jan 3 10:43:18 MET 2001 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Removed spurious 'clean' entry in Makefile.PL. + + Added CAN_FLOCK to determine whether we can flock() or not, + by inspecting Perl's configuration parameters, as determined + by Configure. + + Trace offending package when overloading cannot be restored + on a scalar. + + Made context cleanup safer to avoid dup freeing, mostly in the + presence of repeated exceptions during store/retrieve (which can + cause memory leaks anyway, so it's just additional safety, not a + definite fix). + +Sun Nov 5 18:23:48 MET 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Version 1.0.6. + + Fixed severe "object lost" bug for STORABLE_freeze returns, + when refs to lexicals, taken within the hook, were to be + serialized by Storable. Enhanced the t/recurse.t test to + stress hook a little more with refs to lexicals. + +Thu Oct 26 19:14:38 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> + +. Description: + + Version 1.0.5. + + Documented that store() and retrieve() can return undef. + That is, the error reporting is not always made via exceptions, + as the paragraph on error reporting was implying. + + Auto requires module of blessed ref when STORABLE_thaw misses. + When the Storable engine looks for the STORABLE_thaw hook and + does not find it, it now tries to require the package into which + the blessed reference is. + + Just check $^O, in t/lock.t: there's no need to pull the whole + Config module for that. + Fri Sep 29 21:52:29 MEST 2000 Raphael Manfredi <Raphael_Manfredi@pobox.com> . Description: diff --git a/ext/Storable/Makefile.PL b/ext/Storable/Makefile.PL index 7ed71e69a3..c8151f3083 100644 --- a/ext/Storable/Makefile.PL +++ b/ext/Storable/Makefile.PL @@ -1,4 +1,4 @@ -# $Id: Makefile.PL,v 1.0 2000/09/01 19:40:41 ram Exp $ +# $Id: Makefile.PL,v 1.0.1.1 2001/01/03 09:38:39 ram Exp $ # # Copyright (c) 1995-2000, Raphael Manfredi # @@ -6,6 +6,9 @@ # in the README file that comes with the distribution. # # $Log: Makefile.PL,v $ +# Revision 1.0.1.1 2001/01/03 09:38:39 ram +# patch7: removed spurious 'clean' entry +# # Revision 1.0 2000/09/01 19:40:41 ram # Baseline for first official release. # @@ -19,6 +22,5 @@ WriteMakefile( 'MAN3PODS' => {}, 'VERSION_FROM' => 'Storable.pm', 'dist' => { SUFFIX => 'gz', COMPRESS => 'gzip -f' }, - 'clean' => {'FILES' => '*%'}, ); diff --git a/ext/Storable/Storable.pm b/ext/Storable/Storable.pm index 76c320923b..06c05d4fe9 100644 --- a/ext/Storable/Storable.pm +++ b/ext/Storable/Storable.pm @@ -1,4 +1,4 @@ -;# $Id: Storable.pm,v 1.0 2000/09/01 19:40:41 ram Exp $ +;# $Id: Storable.pm,v 1.0.1.7 2001/01/03 09:39:02 ram Exp $ ;# ;# Copyright (c) 1995-2000, Raphael Manfredi ;# @@ -6,6 +6,21 @@ ;# in the README file that comes with the distribution. ;# ;# $Log: Storable.pm,v $ +;# Revision 1.0.1.7 2001/01/03 09:39:02 ram +;# patch7: added CAN_FLOCK to determine whether we can flock() or not +;# +;# Revision 1.0.1.6 2000/11/05 17:20:25 ram +;# patch6: increased version number +;# +;# Revision 1.0.1.5 2000/10/26 17:10:18 ram +;# patch5: documented that store() and retrieve() can return undef +;# patch5: added paragraph explaining the auto require for thaw hooks +;# +;# Revision 1.0.1.4 2000/10/23 18:02:57 ram +;# patch4: protected calls to flock() for dos platform +;# patch4: added logcarp emulation if they don't have Log::Agent +;# +;# $Log: Storable.pm,v $ ;# Revision 1.0 2000/09/01 19:40:41 ram ;# Baseline for first official release. ;# @@ -26,7 +41,7 @@ package Storable; @ISA = qw(Exporter DynaLoader); use AutoLoader; use vars qw($forgive_me $VERSION); -$VERSION = '1.003'; +$VERSION = '1.007'; *AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr... # @@ -41,6 +56,10 @@ unless (defined @Log::Agent::EXPORT) { require Carp; Carp::croak(@_); } + sub logcarp { + require Carp; + Carp::carp(@_); + } }; } @@ -61,9 +80,25 @@ BEGIN { } sub logcroak; +sub logcarp; sub retrieve_fd { &fd_retrieve } # Backward compatibility +# +# Determine whether locking is possible, but only when needed. +# + +my $CAN_FLOCK; + +sub CAN_FLOCK { + return $CAN_FLOCK if defined $CAN_FLOCK; + require Config; import Config; + return $CAN_FLOCK = + $Config{'d_flock'} || + $Config{'d_fcntl_can_lock'} || + $Config{'d_lockf'}; +} + bootstrap Storable; 1; __END__ @@ -118,6 +153,10 @@ sub _store { open(FILE, ">$file") || logcroak "can't create $file: $!"; binmode FILE; # Archaic systems... if ($use_locking) { + unless (&CAN_FLOCK) { + logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O"; + return undef; + } flock(FILE, LOCK_EX) || logcroak "can't get exclusive lock on $file: $!"; truncate FILE, 0; @@ -234,7 +273,12 @@ sub _retrieve { my $self; my $da = $@; # Could be from exception handler if ($use_locking) { - flock(FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!"; + unless (&CAN_FLOCK) { + logcarp "Storable::lock_retrieve: fcntl/flock emulation broken on $^O"; + return undef; + } + flock(FILE, LOCK_SH) || + logcroak "can't get shared lock on $file: $!"; # Unlocking will happen when FILE is closed } eval { $self = pretrieve(*FILE) }; # Call C routine @@ -435,6 +479,9 @@ those exceptions. When Storable croaks, it tries to report the error via the C<logcroak()> routine from the C<Log::Agent> package, if it is available. +Normal errors are reported by having store() or retrieve() return C<undef>. +Such errors are usually I/O errors (or truncated stream errors at retrieval). + =head1 WIZARDS ONLY =head2 Hooks @@ -514,6 +561,13 @@ and there may be an optional list of references, in the same order you gave them at serialization time, pointing to the deserialized objects (which have been processed courtesy of the Storable engine). +When the Storable engine does not find any C<STORABLE_thaw> hook routine, +it tries to load the class by requiring the package dynamically (using +the blessed package name), and then re-attempts the lookup. If at that +time the hook cannot be located, the engine croaks. Note that this mechanism +will fail if you define several classes in the same file, but perlmod(1) +warned you. + It is up to you to use these information to populate I<obj> the way you want. Returned value: none. diff --git a/ext/Storable/Storable.xs b/ext/Storable/Storable.xs index 1c412b5d20..9378001cc4 100644 --- a/ext/Storable/Storable.xs +++ b/ext/Storable/Storable.xs @@ -3,7 +3,7 @@ */ /* - * $Id: Storable.xs,v 1.0 2000/09/01 19:40:41 ram Exp $ + * $Id: Storable.xs,v 1.0.1.6 2001/01/03 09:40:40 ram Exp $ * * Copyright (c) 1995-2000, Raphael Manfredi * @@ -11,6 +11,21 @@ * in the README file that comes with the distribution. * * $Log: Storable.xs,v $ + * Revision 1.0.1.6 2001/01/03 09:40:40 ram + * patch7: prototype and casting cleanup + * patch7: trace offending package when overloading cannot be restored + * patch7: made context cleanup safer to avoid dup freeing + * + * Revision 1.0.1.5 2000/11/05 17:21:24 ram + * patch6: fixed severe "object lost" bug for STORABLE_freeze returns + * + * Revision 1.0.1.4 2000/10/26 17:11:04 ram + * patch5: auto requires module of blessed ref when STORABLE_thaw misses + * + * Revision 1.0.1.3 2000/09/29 19:49:57 ram + * patch3: avoid using "tainted" and "dirty" since Perl remaps them via cpp + * + * $Log: Storable.xs,v $ * Revision 1.0 2000/09/01 19:40:41 ram * Baseline for first official release. * @@ -87,14 +102,21 @@ typedef double NV; /* Older perls lack the NV type */ #endif #ifdef DEBUGME -#ifndef DASSERT -#define DASSERT -#endif -#define TRACEME(x) do { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); } while (0) +/* + * TRACEME() will only output things when the $Storable::DEBUGME is true. + */ + +#define TRACEME(x) do { \ + if (SvTRUE(perl_get_sv("Storable::DEBUGME", TRUE))) \ + { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); } \ +} while (0) #else #define TRACEME(x) #endif +#ifndef DASSERT +#define DASSERT +#endif #ifdef DASSERT #define ASSERT(x,y) do { \ if (!(x)) { \ @@ -235,6 +257,7 @@ typedef struct stcxt { int entry; /* flags recursion */ int optype; /* type of traversal operation */ HV *hseen; /* which objects have been seen, store time */ + AV *hook_seen; /* which SVs were returned by STORABLE_freeze() */ AV *aseen; /* which objects have been seen, retrieve time */ HV *hclass; /* which classnames have been seen, store time */ AV *aclass; /* which classnames have been seen, retrieve time */ @@ -652,7 +675,7 @@ static char magicstr[] = "pst0"; /* Used as a magic number */ #define GETMARK(x) do { \ if (!cxt->fio) \ MBUF_GETC(x); \ - else if ((x = PerlIO_getc(cxt->fio)) == EOF) \ + else if ((int) (x = PerlIO_getc(cxt->fio)) == EOF) \ return (SV *) 0; \ } while (0) @@ -740,14 +763,14 @@ static int store_tied_item(stcxt_t *cxt, SV *sv); static int store_other(stcxt_t *cxt, SV *sv); static int store_blessed(stcxt_t *cxt, SV *sv, int type, HV *pkg); -static int (*sv_store[])() = { - store_ref, /* svis_REF */ - store_scalar, /* svis_SCALAR */ - store_array, /* svis_ARRAY */ - store_hash, /* svis_HASH */ - store_tied, /* svis_TIED */ - store_tied_item, /* svis_TIED_ITEM */ - store_other, /* svis_OTHER */ +static int (*sv_store[])(stcxt_t *cxt, SV *sv) = { + store_ref, /* svis_REF */ + store_scalar, /* svis_SCALAR */ + (int (*)(stcxt_t *cxt, SV *sv)) store_array, /* svis_ARRAY */ + (int (*)(stcxt_t *cxt, SV *sv)) store_hash, /* svis_HASH */ + store_tied, /* svis_TIED */ + store_tied_item, /* svis_TIED_ITEM */ + store_other, /* svis_OTHER */ }; #define SV_STORE(x) (*sv_store[x]) @@ -773,7 +796,7 @@ static SV *retrieve_tied_hash(stcxt_t *cxt); static SV *retrieve_tied_scalar(stcxt_t *cxt); static SV *retrieve_other(stcxt_t *cxt); -static SV *(*sv_old_retrieve[])() = { +static SV *(*sv_old_retrieve[])(stcxt_t *cxt) = { 0, /* SX_OBJECT -- entry unused dynamically */ retrieve_lscalar, /* SX_LSCALAR */ old_retrieve_array, /* SX_ARRAY -- for pre-0.6 binaries */ @@ -814,7 +837,7 @@ static SV *retrieve_overloaded(stcxt_t *cxt); static SV *retrieve_tied_key(stcxt_t *cxt); static SV *retrieve_tied_idx(stcxt_t *cxt); -static SV *(*sv_retrieve[])() = { +static SV *(*sv_retrieve[])(stcxt_t *cxt) = { 0, /* SX_OBJECT -- entry unused dynamically */ retrieve_lscalar, /* SX_LSCALAR */ retrieve_array, /* SX_ARRAY */ @@ -946,6 +969,15 @@ static void init_store_context( */ cxt->hook = newHV(); /* Table where hooks are cached */ + + /* + * The `hook_seen' array keeps track of all the SVs returned by + * STORABLE_freeze hooks for us to serialize, so that they are not + * reclaimed until the end of the serialization process. Each SV is + * only stored once, the first time it is seen. + */ + + cxt->hook_seen = newAV(); /* Lists SVs returned by STORABLE_freeze */ } /* @@ -975,16 +1007,41 @@ static void clean_store_context(stcxt_t *cxt) /* * And now dispose of them... + * + * The surrounding if() protection has been added because there might be + * some cases where this routine is called more than once, during + * exceptionnal events. This was reported by Marc Lehmann when Storable + * is executed from mod_perl, and the fix was suggested by him. + * -- RAM, 20/12/2000 */ - hv_undef(cxt->hseen); - sv_free((SV *) cxt->hseen); + if (cxt->hseen) { + HV *hseen = cxt->hseen; + cxt->hseen = 0; + hv_undef(hseen); + sv_free((SV *) hseen); + } + + if (cxt->hclass) { + HV *hclass = cxt->hclass; + cxt->hclass = 0; + hv_undef(hclass); + sv_free((SV *) hclass); + } - hv_undef(cxt->hclass); - sv_free((SV *) cxt->hclass); + if (cxt->hook) { + HV *hook = cxt->hook; + cxt->hook = 0; + hv_undef(hook); + sv_free((SV *) hook); + } - hv_undef(cxt->hook); - sv_free((SV *) cxt->hook); + if (cxt->hook_seen) { + AV *hook_seen = cxt->hook_seen; + cxt->hook_seen = 0; + av_undef(hook_seen); + sv_free((SV *) hook_seen); + } cxt->entry = 0; cxt->s_dirty = 0; @@ -1039,17 +1096,33 @@ static void clean_retrieve_context(stcxt_t *cxt) ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()")); - av_undef(cxt->aseen); - sv_free((SV *) cxt->aseen); + if (cxt->aseen) { + AV *aseen = cxt->aseen; + cxt->aseen = 0; + av_undef(aseen); + sv_free((SV *) aseen); + } - av_undef(cxt->aclass); - sv_free((SV *) cxt->aclass); + if (cxt->aclass) { + AV *aclass = cxt->aclass; + cxt->aclass = 0; + av_undef(aclass); + sv_free((SV *) aclass); + } - hv_undef(cxt->hook); - sv_free((SV *) cxt->hook); + if (cxt->hook) { + HV *hook = cxt->hook; + cxt->hook = 0; + hv_undef(hook); + sv_free((SV *) hook); + } - if (cxt->hseen) - sv_free((SV *) cxt->hseen); /* optional HV, for backward compat. */ + if (cxt->hseen) { + HV *hseen = cxt->hseen; + cxt->hseen = 0; + hv_undef(hseen); + sv_free((SV *) hseen); /* optional HV, for backward compat. */ + } cxt->entry = 0; cxt->s_dirty = 0; @@ -1071,6 +1144,8 @@ stcxt_t *cxt; clean_retrieve_context(cxt); else clean_store_context(cxt); + + ASSERT(!cxt->s_dirty, ("context is clean")); } /* @@ -1223,6 +1298,19 @@ static void pkg_hide( } /* + * pkg_uncache + * + * Discard cached value: a whole fetch loop will be retried at next lookup. + */ +static void pkg_uncache( + HV *cache, + HV *pkg, + char *method) +{ + (void) hv_delete(cache, HvNAME(pkg), strlen(HvNAME(pkg)), G_DISCARD); +} + +/* * pkg_can * * Our own "UNIVERSAL::can", which caches results. @@ -2096,11 +2184,14 @@ static int store_hook( for (i = 1; i < count; i++) { SV **svh; - SV *xsv = ary[i]; + SV *rsv = ary[i]; + SV *xsv; + AV *av_hook = cxt->hook_seen; - if (!SvROK(xsv)) - CROAK(("Item #%d from hook in %s is not a reference", i, class)); - xsv = SvRV(xsv); /* Follow ref to know what to look for */ + if (!SvROK(rsv)) + CROAK(("Item #%d returned by STORABLE_freeze " + "for %s is not a reference", i, class)); + xsv = SvRV(rsv); /* Follow ref to know what to look for */ /* * Look in hseen and see if we have a tag already. @@ -2136,11 +2227,34 @@ static int store_hook( CROAK(("Could not serialize item #%d from hook in %s", i, class)); /* - * Replace entry with its tag (not a real SV, so no refcnt increment) + * It was the first time we serialized `xsv'. + * + * Keep this SV alive until the end of the serialization: if we + * disposed of it right now by decrementing its refcount, and it was + * a temporary value, some next temporary value allocated during + * another STORABLE_freeze might take its place, and we'd wrongly + * assume that new SV was already serialized, based on its presence + * in cxt->hseen. + * + * Therefore, push it away in cxt->hook_seen. */ + av_store(av_hook, AvFILLp(av_hook)+1, SvREFCNT_inc(xsv)); + sv_seen: - SvREFCNT_dec(xsv); + /* + * Dispose of the REF they returned. If we saved the `xsv' away + * in the array of returned SVs, that will not cause the underlying + * referenced SV to be reclaimed. + */ + + ASSERT(SvREFCNT(xsv) > 1, ("SV will survive disposal of its REF")); + SvREFCNT_dec(rsv); /* Dispose of reference */ + + /* + * Replace entry with its tag (not a real SV, so no refcnt increment) + */ + ary[i] = *svh; TRACEME(("listed object %d at 0x%"UVxf" is tag #%"UVuf, i-1, PTR2UV(xsv), PTR2UV(*svh))); @@ -3131,8 +3245,37 @@ static SV *retrieve_hook(stcxt_t *cxt) BLESS(sv, class); hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw"); - if (!hook) - CROAK(("No STORABLE_thaw defined for objects of class %s", class)); + if (!hook) { + /* + * Hook not found. Maybe they did not require the module where this + * hook is defined yet? + * + * If the require below succeeds, we'll be able to find the hook. + * Still, it only works reliably when each class is defined in a + * file of its own. + */ + + SV *psv = newSVpvn("require ", 8); + sv_catpv(psv, class); + + TRACEME(("No STORABLE_thaw defined for objects of class %s", class)); + TRACEME(("Going to require module '%s' with '%s'", class, SvPVX(psv))); + + perl_eval_sv(psv, G_DISCARD); + sv_free(psv); + + /* + * We cache results of pkg_can, so we need to uncache before attempting + * the lookup again. + */ + + pkg_uncache(cxt->hook, SvSTASH(sv), "STORABLE_thaw"); + hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw"); + + if (!hook) + CROAK(("No STORABLE_thaw defined for objects of class %s " + "(even after a \"require %s;\")", class, class)); + } /* * If we don't have an `av' yet, prepare one. @@ -3273,9 +3416,10 @@ static SV *retrieve_overloaded(stcxt_t *cxt) stash = (HV *) SvSTASH (sv); if (!stash || !Gv_AMG(stash)) - CROAK(("Cannot restore overloading on %s(0x%"UVxf")", + CROAK(("Cannot restore overloading on %s(0x%"UVxf") (package %s)", sv_reftype(sv, FALSE), - PTR2UV(sv))); + PTR2UV(sv), + stash ? HvNAME(stash) : "<unknown>")); SvAMAGIC_on(rv); diff --git a/ext/Sys/Syslog/Syslog.pm b/ext/Sys/Syslog/Syslog.pm index c7ce3ded96..71f5b828d0 100644 --- a/ext/Sys/Syslog/Syslog.pm +++ b/ext/Sys/Syslog/Syslog.pm @@ -264,7 +264,9 @@ sub xlate { $name = uc $name; $name = "LOG_$name" unless $name =~ /^LOG_/; $name = "Sys::Syslog::$name"; - eval { &$name } || -1; + # Can't have just eval { &$name } || -1 because some LOG_XXX may be zero. + my $value = eval { &$name }; + defined $value ? $value : -1; } sub connect { @@ -274,8 +276,8 @@ sub connect { ($host) = $host_uniq =~ /([A-Za-z0-9_.-]+)/; # allow FQDN (inc _) } unless ( $sock_type ) { - my $udp = getprotobyname('udp'); - my $syslog = getservbyname('syslog','udp'); + my $udp = getprotobyname('udp') || croak "getprotobyname failed for udp"; + my $syslog = getservbyname('syslog','udp') || croak "getservbyname failed"; my $this = sockaddr_in($syslog, INADDR_ANY); my $that = sockaddr_in($syslog, inet_aton($host) || croak "Can't lookup $host"); socket(SYSLOG,AF_INET,SOCK_DGRAM,$udp) || croak "socket: $!"; diff --git a/ext/Thread/Thread.pm b/ext/Thread/Thread.pm index c752e3d6dd..f8a8a26bbc 100644 --- a/ext/Thread/Thread.pm +++ b/ext/Thread/Thread.pm @@ -21,6 +21,11 @@ Thread - manipulate threads in Perl (EXPERIMENTAL, subject to change) $result = $t->join; $result = $t->eval; $t->detach; + $flags = $t->flags; + + if ($t->done) { + $t->join; + } if($t->equal($another_thread)) { # ... @@ -181,6 +186,17 @@ increasing integer assigned when a thread is created. The main thread of a program will have a tid of zero, while subsequent threads will have tids assigned starting with one. +=item flags + +The C<flags> method returns the flags for the thread. This is the +integer value corresponding to the internal flags for the thread, and +the value may not be all that meaningful to you. + +=item done + +The C<done> method returns true if the thread you're checking has +finished, and false otherwise. + =back =head1 LIMITATIONS diff --git a/ext/Thread/Thread.xs b/ext/Thread/Thread.xs index 17e5aefd04..c117c60a42 100644 --- a/ext/Thread/Thread.xs +++ b/ext/Thread/Thread.xs @@ -98,7 +98,6 @@ threadstart(void *arg) DEBUG_S(PerlIO_printf(Perl_debug_log, "new thread %p waiting to start\n", thr)); - /* Don't call *anything* requiring dTHR until after PERL_SET_THX() */ /* * Wait until our creator releases us. If we didn't do this, then * it would be potentially possible for out thread to carry on and @@ -116,7 +115,6 @@ threadstart(void *arg) */ PERL_SET_THX(thr); - /* Only now can we use SvPEEK (which calls sv_newmortal which does dTHR) */ DEBUG_S(PerlIO_printf(Perl_debug_log, "new thread %p starting at %s\n", thr, SvPEEK(TOPs))); @@ -177,7 +175,7 @@ threadstart(void *arg) Safefree(PL_savestack); Safefree(PL_retstack); Safefree(PL_tmps_stack); - Safefree(PL_ofs); + SvREFCNT_dec(PL_ofs_sv); SvREFCNT_dec(PL_rs); SvREFCNT_dec(PL_nrs); @@ -191,6 +189,7 @@ threadstart(void *arg) Safefree(PL_reg_poscache); MUTEX_LOCK(&thr->mutex); + thr->thr_done = 1; DEBUG_S(PerlIO_printf(Perl_debug_log, "%p: threadstart finishing: state is %u\n", thr, ThrSTATE(thr))); @@ -448,6 +447,14 @@ flags(t) #endif void +done(t) + Thread t + PPCODE: +#ifdef USE_THREADS + PUSHs(t->thr_done ? &PL_sv_yes : &PL_sv_no); +#endif + +void self(classname) char * classname PREINIT: diff --git a/ext/re/Makefile.PL b/ext/re/Makefile.PL index bc31b2c2cc..b8d25bd0d6 100644 --- a/ext/re/Makefile.PL +++ b/ext/re/Makefile.PL @@ -1,12 +1,15 @@ use ExtUtils::MakeMaker; use File::Spec; +use Config; + +my $object = 're_exec$(OBJ_EXT) re_comp$(OBJ_EXT) re$(OBJ_EXT)'; WriteMakefile( NAME => 're', VERSION_FROM => 're.pm', MAN3PODS => {}, # Pods will be built by installman. XSPROTOARG => '-noprototypes', - OBJECT => 're_exec$(OBJ_EXT) re_comp$(OBJ_EXT) re$(OBJ_EXT)', + OBJECT => $object, DEFINE => '-DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG', clean => { FILES => '*$(OBJ_EXT) *.c ../../lib/re.pm' }, ); diff --git a/ext/re/hints/aix.pl b/ext/re/hints/aix.pl new file mode 100644 index 0000000000..4fbfefd735 --- /dev/null +++ b/ext/re/hints/aix.pl @@ -0,0 +1,22 @@ +# Add explicit link to deb.o to pick up .Perl_deb symbol which is not +# mentioned in perl.exp for earlier cc (xlc) versions in at least +# non DEBUGGING builds +# Peter Prymmer <pvhp@best.com> + +use Config; + +if ($^O eq 'aix' && defined($Config{'ccversion'}) && + ( $Config{'ccversion'} =~ /^3\.\d/ + # needed for at least these versions: + # $Config{'ccversion'} eq '3.6.6.0' + # $Config{'ccversion'} eq '3.6.4.0' + # $Config{'ccversion'} eq '3.1.4.0' AIX 4.2 + # $Config{'ccversion'} eq '3.1.4.10' AIX 4.2 + # $Config{'ccversion'} eq '3.1.3.3' + || + $Config{'ccversion'} =~ /^4\.4\.0\.[0-3]/ + ) + ) { + $self->{OBJECT} .= ' ../../deb$(OBJ_EXT)'; +} + diff --git a/ext/re/re.xs b/ext/re/re.xs index 04a5fdc742..25c2a90d60 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -25,7 +25,6 @@ static int oldfl; static void deinstall(pTHX) { - dTHR; PL_regexecp = Perl_regexec_flags; PL_regcompp = Perl_pregcomp; PL_regint_start = Perl_re_intuit_start; @@ -39,7 +38,6 @@ deinstall(pTHX) static void install(pTHX) { - dTHR; PL_colorset = 0; /* Allow reinspection of ENV. */ PL_regexecp = &my_regexec; PL_regcompp = &my_regcomp; |