summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>2002-02-21 16:33:37 -0500
committerJarkko Hietaniemi <jhi@iki.fi>2002-02-22 01:56:06 +0000
commit206947d2c0ace466f6b1e79f9bf44a86d72fb50d (patch)
treea6afc3fc8dff78ce751dd986cce9b27dcb304cca
parent445e6c975b30ffac31340f4bbc1d4513954992f8 (diff)
downloadperl-206947d2c0ace466f6b1e79f9bf44a86d72fb50d.tar.gz
pack with a human face: the sequel
Message-ID: <20020221213337.A23848@math.ohio-state.edu> p4raw-id: //depot/perl@14824
-rw-r--r--embed.fnc1
-rw-r--r--embed.h2
-rw-r--r--pod/perldiag.pod6
-rw-r--r--pod/perlfunc.pod18
-rw-r--r--pp_pack.c200
-rw-r--r--proto.h1
-rwxr-xr-xt/op/pack.t61
7 files changed, 267 insertions, 22 deletions
diff --git a/embed.fnc b/embed.fnc
index 60cd3e08e4..fbc9099298 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1067,6 +1067,7 @@ s |int |div128 |SV *pnum|bool *done
s |char * |next_symbol |char *pat|char *patend
s |I32 |find_count |char **ppat|char *patend|int *star
s |char * |group_end |char *pat|char *patend|char ender
+s |I32 |measure_struct |char *pat|char *patend
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
diff --git a/embed.h b/embed.h
index 40bfb28944..d7e137a35e 100644
--- a/embed.h
+++ b/embed.h
@@ -1000,6 +1000,7 @@
#define next_symbol S_next_symbol
#define find_count S_find_count
#define group_end S_group_end
+#define measure_struct S_measure_struct
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
#define docatch S_docatch
@@ -2544,6 +2545,7 @@
#define next_symbol(a,b) S_next_symbol(aTHX_ a,b)
#define find_count(a,b,c) S_find_count(aTHX_ a,b,c)
#define group_end(a,b,c) S_group_end(aTHX_ a,b,c)
+#define measure_struct(a,b) S_measure_struct(aTHX_ a,b)
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
#define docatch(a) S_docatch(aTHX_ a)
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 0c87d94643..6d8e9407f7 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2313,6 +2313,12 @@ supplied. See L<perlform>.
of Perl. Check the #! line, or manually feed your script into Perl
yourself.
+=item %s not allowed in length fields
+
+(F) The count in the (un)pack template may be replaced by C<[TEMPLATE]> only if
+C<TEMPLATE> always matches the same amount of packed bytes. Redesign
+the template.
+
=item no UTC offset information; assuming local time is UTC
(S) A warning peculiar to VMS. Perl was unable to find the local
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index 56ad58f474..dfacad5146 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -3169,7 +3169,7 @@ of values, as follows:
x A null byte.
X Back up a byte.
@ Null fill to absolute position.
- ( Beginning of a ()-group.
+ ( Start of a ()-group.
The following rules apply:
@@ -3179,12 +3179,16 @@ The following rules apply:
Each letter may optionally be followed by a number giving a repeat
count. With all types except C<a>, C<A>, C<Z>, C<b>, C<B>, C<h>,
-C<H>, and C<P> the pack function will gobble up that many values from
-the LIST. A C<*> for the repeat count means to use however many items are
-left, except for C<@>, C<x>, C<X>, where it is equivalent
-to C<0>, and C<u>, where it is equivalent to 1 (or 45, what is the
-same). A numeric repeat count may optionally be enclosed in brackets, as in
-C<pack 'C[80]', @arr>.
+C<H>, C<@>, C<x>, C<X> and C<P> the pack function will gobble up that
+many values from the LIST. A C<*> for the repeat count means to use
+however many items are left, except for C<@>, C<x>, C<X>, where it is
+equivalent to C<0>, and C<u>, where it is equivalent to 1 (or 45, what
+is the same). A numeric repeat count may optionally be enclosed in
+brackets, as in C<pack 'C[80]', @arr>.
+
+One can replace the numeric repeat count by a template enclosed in brackets;
+then the packed length of this template in bytes is used as a count.
+For example, C<x[L]> skips a long (it skips the number of bytes in a long).
When used with C<Z>, C<*> results in the addition of a trailing null
byte (so the packed result will be one longer than the byte C<length>
diff --git a/pp_pack.c b/pp_pack.c
index 6160e64c94..777969c954 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -142,10 +142,177 @@ S_group_end(pTHX_ register char *pat, register char *patend, char ender)
continue;
} else if (c == '(')
pat = group_end(pat, patend, ')') + 1;
+ else if (c == '[')
+ pat = group_end(pat, patend, ']') + 1;
}
croak("No group ending character `%c' found", ender);
}
+/* Returns the sizeof() struct described by pat */
+I32
+S_measure_struct(pTHX_ char *pat, register char *patend)
+{
+ I32 datumtype;
+ register I32 len;
+ register I32 total = 0;
+ int commas = 0;
+ int star; /* 1 if count is *, -1 if no count given, -2 for / */
+#ifdef PERL_NATINT_PACK
+ int natint; /* native integer */
+ int unatint; /* unsigned native integer */
+#endif
+ char buf[2];
+ register int size;
+
+ while ((pat = next_symbol(pat, patend)) < patend) {
+ datumtype = *pat++ & 0xFF;
+#ifdef PERL_NATINT_PACK
+ natint = 0;
+#endif
+ if (*pat == '!') {
+ static const char *natstr = "sSiIlL";
+
+ if (strchr(natstr, datumtype)) {
+#ifdef PERL_NATINT_PACK
+ natint = 1;
+#endif
+ pat++;
+ }
+ else
+ croak("'!' allowed only after types %s", natstr);
+ }
+ len = find_count(&pat, patend, &star);
+ if (star > 0) /* */
+ croak("%s not allowed in length fields", "count *");
+ else if (star < 0) /* No explicit len */
+ len = datumtype != '@';
+
+ switch(datumtype) {
+ default:
+ croak("Invalid type in unpack: '%c'", (int)datumtype);
+ case '@':
+ case '/':
+ case 'U': /* XXXX Is it correct? */
+ case 'w':
+ case 'u':
+ buf[0] = datumtype;
+ buf[1] = 0;
+ croak("%s not allowed in length fields", buf);
+ case ',': /* grandfather in commas but with a warning */
+ if (commas++ == 0 && ckWARN(WARN_UNPACK))
+ Perl_warner(aTHX_ WARN_UNPACK,
+ "Invalid type in unpack: '%c'", (int)datumtype);
+ /* FALL THROUGH */
+ case '%':
+ size = 0;
+ break;
+ case '(':
+ {
+ char *beg = pat, *end;
+
+ if (star >= 0)
+ croak("()-group starts with a count");
+ end = group_end(beg, patend, ')');
+ pat = end + 1;
+ len = find_count(&pat, patend, &star);
+ if (star < 0) /* No count */
+ len = 1;
+ else if (star > 0) /* Star */
+ croak("%s not allowed in length fields", "count *");
+ size = measure_struct(beg, end);
+ break;
+ }
+ case 'X':
+ size = -1;
+ if (total < len)
+ croak("X outside of string");
+ break;
+ case 'x':
+ case 'A':
+ case 'Z':
+ case 'a':
+ case 'c':
+ case 'C':
+ size = 1;
+ break;
+ case 'B':
+ case 'b':
+ len = (len + 7)/8;
+ size = 1;
+ break;
+ case 'H':
+ case 'h':
+ len = (len + 1)/2;
+ size = 1;
+ break;
+ case 's':
+#if SHORTSIZE == SIZE16
+ size = SIZE16;
+#else
+ size = (natint ? sizeof(short) : SIZE16);
+#endif
+ break;
+ case 'v':
+ case 'n':
+ case 'S':
+#if SHORTSIZE == SIZE16
+ size = SIZE16;
+#else
+ unatint = natint && datumtype == 'S';
+ size = (unatint ? sizeof(unsigned short) : SIZE16);
+#endif
+ break;
+ case 'i':
+ size = sizeof(int);
+ break;
+ case 'I':
+ size = sizeof(unsigned int);
+ break;
+ case 'l':
+#if LONGSIZE == SIZE32
+ size = SIZE32;
+#else
+ size = (natint ? sizeof(long) : SIZE32);
+#endif
+ break;
+ case 'V':
+ case 'N':
+ case 'L':
+#if LONGSIZE == SIZE32
+ size = SIZE32;
+#else
+ unatint = natint && datumtype == 'L';
+ size = (unatint ? sizeof(unsigned long) : SIZE32);
+#endif
+ break;
+ case 'P':
+ len = 1;
+ /* FALL THROUGH */
+ case 'p':
+ size = sizeof(char*);
+ break;
+#ifdef HAS_QUAD
+ case 'q':
+ size = sizeof(Quad_t);
+ break;
+ case 'Q':
+ size = sizeof(Uquad_t);
+ break;
+#endif
+ case 'f':
+ case 'F':
+ size = sizeof(float);
+ break;
+ case 'd':
+ case 'D':
+ size = sizeof(double);
+ break;
+ }
+ total += len * size;
+ }
+ return total;
+}
+
/* Returns -1 on no count or on star */
STATIC I32
S_find_count(pTHX_ char **ppat, register char *patend, int *star)
@@ -164,8 +331,15 @@ S_find_count(pTHX_ char **ppat, register char *patend, int *star)
else if (isDIGIT(*pat) || *pat == '[') {
bool brackets = *pat == '[';
- if (brackets)
+ if (brackets) {
++pat, len = 0;
+ if (!isDIGIT(*pat)) {
+ char *end = group_end(pat, patend, ']');
+
+ *ppat = end + 1;
+ return measure_struct(pat, end);
+ }
+ }
else
len = *pat++ - '0';
while (isDIGIT(*pat)) {
@@ -201,7 +375,6 @@ S_next_symbol(pTHX_ register char *pat, register char *patend)
return pat;
}
-
/*
=for apidoc unpack_str
@@ -253,8 +426,15 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *
#ifdef PERL_NATINT_PACK
natint = 0;
#endif
+ /* do first one only unless in list context
+ / is implemented by unpacking the count, then poping it from the
+ stack, so must check that we're not in the middle of a / */
+ if ( (flags & UNPACK_ONLY_ONE)
+ && (SP - PL_stack_base == start_sp_offset + 1)
+ && (datumtype != '/') )
+ break;
if (*pat == '!') {
- char *natstr = "sSiIlL";
+ static const char natstr[] = "sSiIlL";
if (strchr(natstr, datumtype)) {
#ifdef PERL_NATINT_PACK
@@ -269,7 +449,7 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *
if (star > 0)
len = strend - strbeg; /* long enough */
else if (star < 0) /* No explicit len */
- len = datumtype != '@';
+ len = datumtype != '@';
redo_switch:
switch(datumtype) {
@@ -1055,7 +1235,7 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *
}
break;
case 'Q':
- along = (strend - s) / sizeof(Quad_t);
+ along = (strend - s) / sizeof(Uquad_t);
if (len > along)
len = along;
if (checksum) {
@@ -1222,14 +1402,6 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *
XPUSHs(sv_2mortal(sv));
checksum = 0;
}
- if ((flags & UNPACK_ONLY_ONE)
- && SP - PL_stack_base == start_sp_offset + 1) {
- /* do first one only unless in list context
- / is implmented by unpacking the count, then poping it from the
- stack, so must check that we're not in the middle of a / */
- if ((pat >= patend) || *pat != '/')
- break;
- }
}
if (new_s)
*new_s = s;
@@ -1426,7 +1598,7 @@ Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beg
natint = 0;
#endif
if (*pat == '!') {
- char *natstr = "sSiIlL";
+ static const char natstr[] = "sSiIlL";
if (strchr(natstr, datumtype)) {
#ifdef PERL_NATINT_PACK
diff --git a/proto.h b/proto.h
index f3e894cdb3..3de4e0a54a 100644
--- a/proto.h
+++ b/proto.h
@@ -1109,6 +1109,7 @@ STATIC int S_div128(pTHX_ SV *pnum, bool *done);
STATIC char * S_next_symbol(pTHX_ char *pat, char *patend);
STATIC I32 S_find_count(pTHX_ char **ppat, char *patend, int *star);
STATIC char * S_group_end(pTHX_ char *pat, char *patend, char ender);
+STATIC I32 S_measure_struct(pTHX_ char *pat, char *patend);
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
diff --git a/t/op/pack.t b/t/op/pack.t
index c0f379b085..5984be5077 100755
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 1493;
+plan tests => 3943;
use strict;
use warnings;
@@ -749,3 +749,62 @@ foreach (
@a = unpack '(SL)3 SL', pack '(SL)*', 67..74;
is("@a", "@b");
}
+
+{ # Repeat count [SUBEXPR]
+ my @codes = qw( x A Z a c C B b H h s v n S i I l V N L p P f F d D
+ s! S! i! I! l! L! );
+ if (eval { pack 'q', 1 } ) {
+ push @codes, qw(q Q);
+ } else {
+ push @codes, qw(c C); # Keep the count the same
+ }
+
+ my %val;
+ @val{@codes} = map { / [Xx] (?{ undef })
+ | [AZa] (?{ 'something' })
+ | C (?{ 214 })
+ | c (?{ 114 })
+ | [Bb] (?{ '101' })
+ | [Hh] (?{ 'b8' })
+ | [svnSiIlVNLqQ] (?{ 10111 })
+ | [FfDd] (?{ 1.36514538e67 })
+ | [pP] (?{ "try this buffer" })
+ /x; $^R } @codes;
+ my @end = (0x12345678, 0x23456781, 0x35465768, 0x15263748);
+ my $end = "N4";
+
+ for my $type (@codes) {
+ my @list = $val{$type};
+ @list = () unless defined $list[0];
+ for my $count ('', '3', '[11]') {
+ my $c = 1;
+ $c = $1 if $count =~ /(\d+)/;
+ my @list1 = @list;
+ @list1 = (@list1) x $c unless $type =~ /[XxAaZBbHhP]/;
+ for my $groupend ('', ')2', ')[8]') {
+ my $groupbegin = ($groupend ? '(' : '');
+ $c = 1;
+ $c = $1 if $groupend =~ /(\d+)/;
+ my @list2 = (@list1) x $c;
+
+ my $junk1 = "$groupbegin $type$count $groupend";
+ # print "# junk1=$junk1\n";
+ my $p = pack $junk1, @list2;
+ my $half = int( (length $p)/2 );
+ for my $move ('', "X$half", 'x1', "x$half") {
+ my $junk = "$junk1 $move";
+ # print "# junk=$junk list=(@list2)\n";
+ $p = pack "$junk $end", @list2, @end;
+ my @l = unpack "x[$junk] $end", $p;
+ is(scalar @l, scalar @end);
+ is("@l", "@end", "skipping x[$junk]");
+ }
+ }
+ }
+ }
+}
+
+# / is recognized after spaces in scalar context
+# XXXX no spaces are allowed in pack... In pack only before the slash...
+is(scalar unpack('A /A Z20', pack 'A/A* Z20', 'bcde', 'xxxxx'), 'bcde');
+is(scalar unpack('A /A /A Z20', '3004bcde'), 'bcde');