summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>2002-02-21 01:06:14 -0500
committerJarkko Hietaniemi <jhi@iki.fi>2002-02-21 17:28:23 +0000
commit18529408d66077360bcde31227a7fb97dfc49222 (patch)
tree940b010257b7ddcef7fc18baec0bc75724bc3640
parent7a2cf369e16a2ef8ad45d686139073724ed14fe2 (diff)
downloadperl-18529408d66077360bcde31227a7fb97dfc49222.tar.gz
grouping in pack/unpack
Message-ID: <20020221060614.A29836@math.ohio-state.edu> p4raw-id: //depot/perl@14815
-rw-r--r--embed.fnc5
-rw-r--r--embed.h10
-rw-r--r--global.sym2
-rw-r--r--pod/perldiag.pod5
-rw-r--r--pod/perlfunc.pod15
-rw-r--r--pod/perltodo.pod4
-rw-r--r--pp_pack.c385
-rw-r--r--proto.h5
-rwxr-xr-xt/op/pack.t40
9 files changed, 337 insertions, 134 deletions
diff --git a/embed.fnc b/embed.fnc
index 1c6403fb7e..60cd3e08e4 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -572,6 +572,7 @@ Ap |void |set_numeric_local
Ap |void |set_numeric_radix
Ap |void |set_numeric_standard
Apd |void |require_pv |const char* pv
+Apd |void |pack_cat |SV *cat|char *pat|char *patend|SV **beglist|SV **endlist|SV ***next_in_list|U32 flags
p |void |pidgone |Pid_t pid|int status
Ap |void |pmflag |U16* pmfl|int ch
p |OP* |pmruntime |OP* pm|OP* expr|OP* repl
@@ -792,6 +793,7 @@ Ap |I32 |unlnk |char* f
#if defined(USE_5005THREADS)
Ap |void |unlock_condpair|void* svv
#endif
+Apd |I32 |unpack_str |char *pat|char *patend|char *s|char *strbeg|char *strend|char **new_s|I32 ocnt|U32 flags
Ap |void |unsharepvn |const char* sv|I32 len|U32 hash
p |void |unshare_hek |HEK* hek
p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg
@@ -1062,6 +1064,9 @@ s |void |doencodes |SV* sv|char* s|I32 len
s |SV* |mul128 |SV *sv|U8 m
s |SV* |is_an_int |char *s|STRLEN l
s |int |div128 |SV *pnum|bool *done
+s |char * |next_symbol |char *pat|char *patend
+s |I32 |find_count |char **ppat|char *patend|int *star
+s |char * |group_end |char *pat|char *patend|char ender
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
diff --git a/embed.h b/embed.h
index a84707b9f2..40bfb28944 100644
--- a/embed.h
+++ b/embed.h
@@ -552,6 +552,7 @@
#define set_numeric_radix Perl_set_numeric_radix
#define set_numeric_standard Perl_set_numeric_standard
#define require_pv Perl_require_pv
+#define pack_cat Perl_pack_cat
#define pidgone Perl_pidgone
#define pmflag Perl_pmflag
#define pmruntime Perl_pmruntime
@@ -749,6 +750,7 @@
#if defined(USE_5005THREADS)
#define unlock_condpair Perl_unlock_condpair
#endif
+#define unpack_str Perl_unpack_str
#define unsharepvn Perl_unsharepvn
#define unshare_hek Perl_unshare_hek
#define utilize Perl_utilize
@@ -995,6 +997,9 @@
#define mul128 S_mul128
#define is_an_int S_is_an_int
#define div128 S_div128
+#define next_symbol S_next_symbol
+#define find_count S_find_count
+#define group_end S_group_end
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
#define docatch S_docatch
@@ -2099,6 +2104,7 @@
#define set_numeric_radix() Perl_set_numeric_radix(aTHX)
#define set_numeric_standard() Perl_set_numeric_standard(aTHX)
#define require_pv(a) Perl_require_pv(aTHX_ a)
+#define pack_cat(a,b,c,d,e,f,g) Perl_pack_cat(aTHX_ a,b,c,d,e,f,g)
#define pidgone(a,b) Perl_pidgone(aTHX_ a,b)
#define pmflag(a,b) Perl_pmflag(aTHX_ a,b)
#define pmruntime(a,b,c) Perl_pmruntime(aTHX_ a,b,c)
@@ -2294,6 +2300,7 @@
#if defined(USE_5005THREADS)
#define unlock_condpair(a) Perl_unlock_condpair(aTHX_ a)
#endif
+#define unpack_str(a,b,c,d,e,f,g,h) Perl_unpack_str(aTHX_ a,b,c,d,e,f,g,h)
#define unsharepvn(a,b,c) Perl_unsharepvn(aTHX_ a,b,c)
#define unshare_hek(a) Perl_unshare_hek(aTHX_ a)
#define utilize(a,b,c,d,e) Perl_utilize(aTHX_ a,b,c,d,e)
@@ -2534,6 +2541,9 @@
#define mul128(a,b) S_mul128(aTHX_ a,b)
#define is_an_int(a,b) S_is_an_int(aTHX_ a,b)
#define div128(a,b) S_div128(aTHX_ a,b)
+#define next_symbol(a,b) S_next_symbol(aTHX_ a,b)
+#define find_count(a,b,c) S_find_count(aTHX_ a,b,c)
+#define group_end(a,b,c) S_group_end(aTHX_ a,b,c)
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
#define docatch(a) S_docatch(aTHX_ a)
diff --git a/global.sym b/global.sym
index 757e1bde71..624f356da3 100644
--- a/global.sym
+++ b/global.sym
@@ -341,6 +341,7 @@ Perl_set_numeric_local
Perl_set_numeric_radix
Perl_set_numeric_standard
Perl_require_pv
+Perl_pack_cat
Perl_pmflag
Perl_pop_scope
Perl_push_scope
@@ -496,6 +497,7 @@ Perl_to_utf8_title
Perl_to_utf8_fold
Perl_unlnk
Perl_unlock_condpair
+Perl_unpack_str
Perl_unsharepvn
Perl_utf16_to_utf8
Perl_utf16_to_utf8_reversed
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 5be9ced695..0c87d94643 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1559,6 +1559,11 @@ version of Perl, and this should not happen anyway.
(F) Unlike with "next" or "last", you're not allowed to goto an
unspecified destination. See L<perlfunc/goto>.
+=item %s-group starts with a count
+
+(F) In pack/unpack a ()-group started with a count. A count is
+supposed to follow something: a template character or a ()-group.
+
=item %s had compilation errors
(F) The final summary message when a C<perl -c> fails.
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index ea196c27c0..56ad58f474 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -3104,9 +3104,8 @@ the converted values. Typically, each converted value looks
like its machine-level representation. For example, on 32-bit machines
a converted integer may be represented by a sequence of 4 bytes.
-The TEMPLATE is a
-sequence of characters that give the order and type of values, as
-follows:
+The TEMPLATE is a sequence of characters that give the order and type
+of values, as follows:
a A string with arbitrary binary data, will be null padded.
A A text (ASCII) string, will be space padded.
@@ -3170,6 +3169,7 @@ follows:
x A null byte.
X Back up a byte.
@ Null fill to absolute position.
+ ( Beginning of a ()-group.
The following rules apply:
@@ -3183,7 +3183,8 @@ C<H>, and C<P> the pack function will gobble up that many values from
the LIST. A C<*> for the repeat count means to use however many items are
left, except for C<@>, C<x>, C<X>, where it is equivalent
to C<0>, and C<u>, where it is equivalent to 1 (or 45, what is the
-same).
+same). A numeric repeat count may optionally be enclosed in brackets, as in
+C<pack 'C[80]', @arr>.
When used with C<Z>, C<*> results in the addition of a trailing null
byte (so the packed result will be one longer than the byte C<length>
@@ -3400,6 +3401,12 @@ sequences of bytes.
=item *
+A ()-group is a sub-TEMPLATE enclosed in parentheses. A group may
+take a repeat count, both as postfix, and via the C</> template
+character.
+
+=item *
+
A comment in a TEMPLATE starts with C<#> and goes to the end of line.
=item *
diff --git a/pod/perltodo.pod b/pod/perltodo.pod
index 1b64991faf..8606f076b3 100644
--- a/pod/perltodo.pod
+++ b/pod/perltodo.pod
@@ -307,10 +307,6 @@ would rely on even more sed hackery in F<perly.fixer>.
j, J, g, G?
-=head2 pack "(stuff)*"
-
-That's to say, C<pack "(sI)40"> would be the same as C<pack "sI"x40>
-
=head2 bitfields in pack
=head2 Cross compilation
diff --git a/pp_pack.c b/pp_pack.c
index 52d86d18bb..6160e64c94 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -123,32 +123,102 @@ S_mul128(pTHX_ SV *sv, U8 m)
#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
#endif
+#define UNPACK_ONLY_ONE 0x1
+#define UNPACK_DO_UTF8 0x2
-PP(pp_unpack)
+STATIC char *
+S_group_end(pTHX_ register char *pat, register char *patend, char ender)
+{
+ while (pat < patend) {
+ char c = *pat++;
+
+ if (isSPACE(c))
+ continue;
+ else if (c == ender)
+ return --pat;
+ else if (c == '#') {
+ while (pat < patend && *pat != '\n')
+ pat++;
+ continue;
+ } else if (c == '(')
+ pat = group_end(pat, patend, ')') + 1;
+ }
+ croak("No group ending character `%c' found", ender);
+}
+
+/* Returns -1 on no count or on star */
+STATIC I32
+S_find_count(pTHX_ char **ppat, register char *patend, int *star)
+{
+ register char *pat = *ppat;
+ I32 len;
+
+ *star = 0;
+ if (pat >= patend)
+ len = 1;
+ else if (*pat == '*') {
+ pat++;
+ *star = 1;
+ len = -1;
+ }
+ else if (isDIGIT(*pat) || *pat == '[') {
+ bool brackets = *pat == '[';
+
+ if (brackets)
+ ++pat, len = 0;
+ else
+ len = *pat++ - '0';
+ while (isDIGIT(*pat)) {
+ len = (len * 10) + (*pat++ - '0');
+ if (len < 0)
+ croak("Repeat count in unpack overflows");
+ }
+ if (brackets && *pat++ != ']')
+ croak("No repeat count ender ] found after digits");
+ }
+ else
+ len = *star = -1;
+ *ppat = pat;
+ return len;
+}
+
+STATIC char *
+S_next_symbol(pTHX_ register char *pat, register char *patend)
+{
+ while (pat < patend) {
+ if (isSPACE(*pat))
+ pat++;
+ else if (*pat == '#') {
+ pat++;
+ while (pat < patend && *pat != '\n')
+ pat++;
+ if (pat < patend)
+ pat++;
+ }
+ else
+ return pat;
+ }
+ return pat;
+}
+
+
+/*
+=for apidoc unpack_str
+
+The engine implementing unpack() Perl function.
+
+=cut */
+
+I32
+Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
{
dSP;
- dPOPPOPssrl;
- I32 start_sp_offset = SP - PL_stack_base;
- I32 gimme = GIMME_V;
- SV *sv;
- STRLEN llen;
- STRLEN rlen;
- register char *pat = SvPV(left, llen);
-#ifdef PACKED_IS_OCTETS
- /* Packed side is assumed to be octets - so force downgrade if it
- has been UTF-8 encoded by accident
- */
- register char *s = SvPVbyte(right, rlen);
-#else
- register char *s = SvPV(right, rlen);
-#endif
- char *strend = s + rlen;
- char *strbeg = s;
- register char *patend = pat + llen;
I32 datumtype;
register I32 len;
register I32 bits = 0;
register char *str;
+ SV *sv;
+ I32 start_sp_offset = SP - PL_stack_base;
/* These must not be in registers: */
short ashort;
@@ -171,26 +241,18 @@ PP(pp_unpack)
NV cdouble = 0.0;
const int bits_in_uv = 8 * sizeof(culong);
int commas = 0;
- int star;
+ int star; /* 1 if count is *, -1 if no count given, -2 for / */
#ifdef PERL_NATINT_PACK
int natint; /* native integer */
int unatint; /* unsigned native integer */
#endif
- bool do_utf8 = DO_UTF8(right);
+ bool do_utf8 = flags & UNPACK_DO_UTF8;
- while (pat < patend) {
- reparse:
+ while ((pat = next_symbol(pat, patend)) < patend) {
datumtype = *pat++ & 0xFF;
#ifdef PERL_NATINT_PACK
natint = 0;
#endif
- if (isSPACE(datumtype))
- continue;
- if (datumtype == '#') {
- while (pat < patend && *pat != '\n')
- pat++;
- continue;
- }
if (*pat == '!') {
char *natstr = "sSiIlL";
@@ -201,69 +263,83 @@ PP(pp_unpack)
pat++;
}
else
- DIE(aTHX_ "'!' allowed only after types %s", natstr);
- }
- star = 0;
- if (pat >= patend)
- len = 1;
- else if (*pat == '*') {
- len = strend - strbeg; /* long enough */
- pat++;
- star = 1;
- }
- else if (isDIGIT(*pat)) {
- len = *pat++ - '0';
- while (isDIGIT(*pat)) {
- len = (len * 10) + (*pat++ - '0');
- if (len < 0)
- DIE(aTHX_ "Repeat count in unpack overflows");
- }
+ croak("'!' allowed only after types %s", natstr);
}
- else
- len = (datumtype != '@');
+ len = find_count(&pat, patend, &star);
+ if (star > 0)
+ len = strend - strbeg; /* long enough */
+ else if (star < 0) /* No explicit len */
+ len = datumtype != '@';
+
redo_switch:
switch(datumtype) {
default:
- DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype);
+ croak("Invalid type in unpack: '%c'", (int)datumtype);
case ',': /* grandfather in commas but with a warning */
if (commas++ == 0 && ckWARN(WARN_UNPACK))
Perl_warner(aTHX_ WARN_UNPACK,
"Invalid type in unpack: '%c'", (int)datumtype);
break;
case '%':
- if (len == 1 && pat[-1] != '1')
- len = 16;
+ if (len == 1 && pat[-1] != '1' && pat[-1] != ']')
+ len = 16; /* len is not specified */
checksum = len;
culong = 0;
cdouble = 0;
- if (pat < patend)
- goto reparse;
+ continue;
break;
+ case '(':
+ {
+ char *beg = pat;
+ char *ss = s; /* Move from register */
+
+ if (star >= 0)
+ croak("()-group starts with a count");
+ aptr = group_end(beg, patend, ')');
+ pat = aptr + 1;
+ if (star != -2) {
+ len = find_count(&pat, patend, &star);
+ if (star < 0) /* No count */
+ len = 1;
+ else if (star > 0) /* Star */
+ len = strend - strbeg; /* long enough? */
+ }
+ PUTBACK;
+ while (len--) {
+ unpack_str(beg, aptr, ss, strbeg, strend, &ss,
+ ocnt + SP - PL_stack_base - start_sp_offset, flags);
+ if (star > 0 && ss == strend)
+ break; /* No way to continue */
+ }
+ SPAGAIN;
+ s = ss;
+ break;
+ }
case '@':
if (len > strend - strbeg)
- DIE(aTHX_ "@ outside of string");
+ croak("@ outside of string");
s = strbeg + len;
break;
case 'X':
if (len > s - strbeg)
- DIE(aTHX_ "X outside of string");
+ croak("X outside of string");
s -= len;
break;
case 'x':
if (len > strend - s)
- DIE(aTHX_ "x outside of string");
+ croak("x outside of string");
s += len;
break;
case '/':
- if (start_sp_offset >= SP - PL_stack_base)
- DIE(aTHX_ "/ must follow a numeric type");
+ if (ocnt + SP - PL_stack_base - start_sp_offset <= 0)
+ croak("/ must follow a numeric type");
datumtype = *pat++;
if (*pat == '*')
pat++; /* ignore '*' for compatibility with pack */
if (isDIGIT(*pat))
- DIE(aTHX_ "/ cannot take a count" );
+ croak("/ cannot take a count" );
len = POPi;
- star = 0;
+ star = -2;
goto redo_switch;
case 'A':
case 'Z':
@@ -280,7 +356,7 @@ PP(pp_unpack)
s = SvPVX(sv);
while (*s)
s++;
- if (star) /* exact for 'Z*' */
+ if (star > 0) /* exact for 'Z*' */
len = s - SvPVX(sv) + 1;
}
else { /* 'A' strips both nulls and spaces */
@@ -297,7 +373,7 @@ PP(pp_unpack)
break;
case 'B':
case 'b':
- if (star || len > (strend - s) * 8)
+ if (star > 0 || len > (strend - s) * 8)
len = (strend - s) * 8;
if (checksum) {
if (!PL_bitcount) {
@@ -363,7 +439,7 @@ PP(pp_unpack)
break;
case 'H':
case 'h':
- if (star || len > (strend - s) * 2)
+ if (star > 0 || len > (strend - s) * 2)
len = (strend - s) * 2;
sv = NEWSV(35, len + 1);
SvCUR_set(sv, len);
@@ -926,12 +1002,12 @@ PP(pp_unpack)
}
}
if ((s >= strend) && bytes)
- DIE(aTHX_ "Unterminated compressed integer");
+ croak("Unterminated compressed integer");
}
break;
case 'P':
- if (star)
- DIE(aTHX_ "P must have an explicit size");
+ if (star > 0)
+ croak("P must have an explicit size");
EXTEND(SP, 1);
if (sizeof(char*) > strend - s)
break;
@@ -1146,17 +1222,48 @@ PP(pp_unpack)
XPUSHs(sv_2mortal(sv));
checksum = 0;
}
- if (gimme != G_ARRAY &&
- SP - PL_stack_base == start_sp_offset + 1) {
- /* do first one only unless in list context
+ if ((flags & UNPACK_ONLY_ONE)
+ && SP - PL_stack_base == start_sp_offset + 1) {
+ /* do first one only unless in list context
/ is implmented by unpacking the count, then poping it from the
stack, so must check that we're not in the middle of a / */
if ((pat >= patend) || *pat != '/')
- RETURN;
+ break;
}
}
- if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR)
- PUSHs(&PL_sv_undef);
+ if (new_s)
+ *new_s = s;
+ PUTBACK;
+ return SP - PL_stack_base - start_sp_offset;
+}
+
+PP(pp_unpack)
+{
+ dSP;
+ dPOPPOPssrl;
+ I32 gimme = GIMME_V;
+ STRLEN llen;
+ STRLEN rlen;
+ register char *pat = SvPV(left, llen);
+#ifdef PACKED_IS_OCTETS
+ /* Packed side is assumed to be octets - so force downgrade if it
+ has been UTF-8 encoded by accident
+ */
+ register char *s = SvPVbyte(right, rlen);
+#else
+ register char *s = SvPV(right, rlen);
+#endif
+ char *strend = s + rlen;
+ register char *patend = pat + llen;
+ register I32 cnt;
+
+ PUTBACK;
+ cnt = unpack_str(pat, patend, s, s, strend, NULL, 0,
+ ((gimme == G_SCALAR) ? UNPACK_ONLY_ONE : 0)
+ | (DO_UTF8(right) ? UNPACK_DO_UTF8 : 0));
+ SPAGAIN;
+ if ( !cnt && gimme == G_SCALAR )
+ PUSHs(&PL_sv_undef);
RETURN;
}
@@ -1264,22 +1371,27 @@ S_div128(pTHX_ SV *pnum, bool *done)
return (m);
}
+#define PACK_CHILD 0x1
-PP(pp_pack)
+/*
+=for apidoc pack_cat
+
+The engine implementing pack() Perl function.
+
+=cut */
+
+void
+Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
{
- dSP; dMARK; dORIGMARK; dTARGET;
- register SV *cat = TARG;
register I32 items;
STRLEN fromlen;
- register char *pat = SvPVx(*++MARK, fromlen);
- char *patcopy;
- register char *patend = pat + fromlen;
register I32 len;
I32 datumtype;
SV *fromstr;
/*SUPPRESS 442*/
static char null10[] = {0,0,0,0,0,0,0,0,0,0};
static char *space10 = " ";
+ int star;
/* These must not be in registers: */
char achar;
@@ -1300,30 +1412,19 @@ PP(pp_pack)
int natint; /* native integer */
#endif
- items = SP - MARK;
- MARK++;
- sv_setpvn(cat, "", 0);
- patcopy = pat;
- while (pat < patend) {
+ items = endlist - beglist;
+#ifndef PACKED_IS_OCTETS
+ pat = next_symbol(pat, patend);
+ if (pat < patend && *pat == 'U' && !flags)
+ SvUTF8_on(cat);
+#endif
+ while ((pat = next_symbol(pat, patend)) < patend) {
SV *lengthcode = Nullsv;
-#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no)
+#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
datumtype = *pat++ & 0xFF;
#ifdef PERL_NATINT_PACK
natint = 0;
#endif
- if (isSPACE(datumtype)) {
- patcopy++;
- continue;
- }
-#ifndef PACKED_IS_OCTETS
- if (datumtype == 'U' && pat == patcopy+1)
- SvUTF8_on(cat);
-#endif
- if (datumtype == '#') {
- while (pat < patend && *pat != '\n')
- pat++;
- continue;
- }
if (*pat == '!') {
char *natstr = "sSiIlL";
@@ -1334,40 +1435,31 @@ PP(pp_pack)
pat++;
}
else
- DIE(aTHX_ "'!' allowed only after types %s", natstr);
+ croak("'!' allowed only after types %s", natstr);
}
- if (*pat == '*') {
+ len = find_count(&pat, patend, &star);
+ if (star > 0) /* Count is '*' */
len = strchr("@Xxu", datumtype) ? 0 : items;
- pat++;
- }
- else if (isDIGIT(*pat)) {
- len = *pat++ - '0';
- while (isDIGIT(*pat)) {
- len = (len * 10) + (*pat++ - '0');
- if (len < 0)
- DIE(aTHX_ "Repeat count in pack overflows");
- }
- }
- else
+ else if (star < 0) /* Default len */
len = 1;
- if (*pat == '/') {
+ if (*pat == '/') { /* doing lookahead how... */
++pat;
if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*')
- DIE(aTHX_ "/ must be followed by a*, A* or Z*");
+ croak("/ must be followed by a*, A* or Z*");
lengthcode = sv_2mortal(newSViv(sv_len(items > 0
- ? *MARK : &PL_sv_no)
+ ? *beglist : &PL_sv_no)
+ (*pat == 'Z' ? 1 : 0)));
}
switch(datumtype) {
default:
- DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype);
+ croak("Invalid type in pack: '%c'", (int)datumtype);
case ',': /* grandfather in commas but with a warning */
if (commas++ == 0 && ckWARN(WARN_PACK))
Perl_warner(aTHX_ WARN_PACK,
"Invalid type in pack: '%c'", (int)datumtype);
break;
case '%':
- DIE(aTHX_ "%% may only be used in unpack");
+ croak("%% may only be used in unpack");
case '@':
len -= SvCUR(cat);
if (len > 0)
@@ -1376,10 +1468,35 @@ PP(pp_pack)
if (len > 0)
goto shrink;
break;
+ case '(':
+ {
+ char *beg = pat;
+ SV **savebeglist = beglist; /* beglist de-register-ed */
+
+ if (star >= 0)
+ croak("()-group starts with a count");
+ aptr = group_end(beg, patend, ')');
+ pat = aptr + 1;
+ if (star != -2) {
+ len = find_count(&pat, patend, &star);
+ if (star < 0) /* No count */
+ len = 1;
+ else if (star > 0) /* Star */
+ len = items; /* long enough? */
+ }
+ while (len--) {
+ pack_cat(cat, beg, aptr, savebeglist, endlist,
+ &savebeglist, PACK_CHILD);
+ if (star > 0 && savebeglist == endlist)
+ break; /* No way to continue */
+ }
+ beglist = savebeglist;
+ break;
+ }
case 'X':
shrink:
if (SvCUR(cat) < len)
- DIE(aTHX_ "X outside of string");
+ croak("X outside of string");
SvCUR(cat) -= len;
*SvEND(cat) = '\0';
break;
@@ -1396,7 +1513,7 @@ PP(pp_pack)
case 'a':
fromstr = NEXTFROM;
aptr = SvPV(fromstr, fromlen);
- if (pat[lengthcode ? -2 : -1] == '*') { /* -2 after '/' */
+ if (star > 0) { /* -2 after '/' */
len = fromlen;
if (datumtype == 'Z')
++len;
@@ -1434,7 +1551,7 @@ PP(pp_pack)
fromstr = NEXTFROM;
saveitems = items;
str = SvPV(fromstr, fromlen);
- if (pat[-1] == '*')
+ if (star > 0)
len = fromlen;
aint = SvCUR(cat);
SvCUR(cat) += (len+7)/8;
@@ -1490,7 +1607,7 @@ PP(pp_pack)
fromstr = NEXTFROM;
saveitems = items;
str = SvPV(fromstr, fromlen);
- if (pat[-1] == '*')
+ if (star > 0)
len = fromlen;
aint = SvCUR(cat);
SvCUR(cat) += (len+1)/2;
@@ -1668,7 +1785,7 @@ PP(pp_pack)
adouble = Perl_floor(SvNV(fromstr));
if (adouble < 0)
- DIE(aTHX_ "Cannot compress negative numbers");
+ croak("Cannot compress negative numbers");
if (
#if UVSIZE > 4 && UVSIZE >= NVSIZE
@@ -1702,7 +1819,7 @@ PP(pp_pack)
/* Copy string and check for compliance */
from = SvPV(fromstr, len);
if ((norm = is_an_int(from, len)) == NULL)
- DIE(aTHX_ "can compress only unsigned integer");
+ croak("can compress only unsigned integer");
New('w', result, len, char);
in = result + len;
@@ -1722,7 +1839,7 @@ PP(pp_pack)
double next = floor(adouble / 128);
*--in = (unsigned char)(adouble - (next * 128)) | 0x80;
if (in <= buf) /* this cannot happen ;-) */
- DIE(aTHX_ "Cannot compress integer");
+ croak("Cannot compress integer");
adouble = next;
} while (adouble > 0);
buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
@@ -1737,7 +1854,7 @@ PP(pp_pack)
/* Copy string and check for compliance */
from = SvPV(fromstr, len);
if ((norm = is_an_int(from, len)) == NULL)
- DIE(aTHX_ "can compress only unsigned integer");
+ croak("can compress only unsigned integer");
New('w', result, len, char);
in = result + len;
@@ -1888,10 +2005,28 @@ PP(pp_pack)
break;
}
}
+ if (next_in_list)
+ *next_in_list = beglist;
+}
+#undef NEXTFROM
+
+
+PP(pp_pack)
+{
+ dSP; dMARK; dORIGMARK; dTARGET;
+ register SV *cat = TARG;
+ STRLEN fromlen;
+ register char *pat = SvPVx(*++MARK, fromlen);
+ register char *patend = pat + fromlen;
+
+ MARK++;
+ sv_setpvn(cat, "", 0);
+
+ pack_cat(cat, pat, patend, MARK, SP + 1, NULL, 0);
+
SvSETMAGIC(cat);
SP = ORIGMARK;
PUSHs(cat);
RETURN;
}
-#undef NEXTFROM
diff --git a/proto.h b/proto.h
index 963c70c8d2..f3e894cdb3 100644
--- a/proto.h
+++ b/proto.h
@@ -614,6 +614,7 @@ PERL_CALLCONV void Perl_set_numeric_local(pTHX);
PERL_CALLCONV void Perl_set_numeric_radix(pTHX);
PERL_CALLCONV void Perl_set_numeric_standard(pTHX);
PERL_CALLCONV void Perl_require_pv(pTHX_ const char* pv);
+PERL_CALLCONV void Perl_pack_cat(pTHX_ SV *cat, char *pat, char *patend, SV **beglist, SV **endlist, SV ***next_in_list, U32 flags);
PERL_CALLCONV void Perl_pidgone(pTHX_ Pid_t pid, int status);
PERL_CALLCONV void Perl_pmflag(pTHX_ U16* pmfl, int ch);
PERL_CALLCONV OP* Perl_pmruntime(pTHX_ OP* pm, OP* expr, OP* repl);
@@ -824,6 +825,7 @@ PERL_CALLCONV I32 Perl_unlnk(pTHX_ char* f);
#if defined(USE_5005THREADS)
PERL_CALLCONV void Perl_unlock_condpair(pTHX_ void* svv);
#endif
+PERL_CALLCONV I32 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags);
PERL_CALLCONV void Perl_unsharepvn(pTHX_ const char* sv, I32 len, U32 hash);
PERL_CALLCONV void Perl_unshare_hek(pTHX_ HEK* hek);
PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg);
@@ -1104,6 +1106,9 @@ STATIC void S_doencodes(pTHX_ SV* sv, char* s, I32 len);
STATIC SV* S_mul128(pTHX_ SV *sv, U8 m);
STATIC SV* S_is_an_int(pTHX_ char *s, STRLEN l);
STATIC int S_div128(pTHX_ SV *pnum, bool *done);
+STATIC char * S_next_symbol(pTHX_ char *pat, char *patend);
+STATIC I32 S_find_count(pTHX_ char **ppat, char *patend, int *star);
+STATIC char * S_group_end(pTHX_ char *pat, char *patend, char ender);
#endif
#if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT)
diff --git a/t/op/pack.t b/t/op/pack.t
index 6bbd737d0a..c0f379b085 100755
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 1477;
+plan tests => 1493;
use strict;
use warnings;
@@ -711,3 +711,41 @@ foreach (
eval { my $t=unpack("P*", "abc") };
like($@, qr/P must have an explicit size/);
}
+
+{ # Grouping constructs
+ my (@a, @b);
+ @a = unpack '(SL)', pack 'SLSLSL', 67..90;
+ is("@a", "67 68");
+ @a = unpack '(SL)3', pack 'SLSLSL', 67..90;
+ @b = (67..72);
+ is("@a", "@b");
+ @a = unpack '(SL)3', pack 'SLSLSLSL', 67..90;
+ is("@a", "@b");
+ @a = unpack '(SL)[3]', pack 'SLSLSLSL', 67..90;
+ is("@a", "@b");
+ @a = unpack '(SL)[2] SL', pack 'SLSLSLSL', 67..90;
+ is("@a", "@b");
+ @a = unpack 'A/(SL)', pack 'ASLSLSLSL', 3, 67..90;
+ is("@a", "@b");
+ @a = unpack 'A/(SL)SL', pack 'ASLSLSLSL', 2, 67..90;
+ is("@a", "@b");
+ @a = unpack '(SL)*', pack 'SLSLSLSL', 67..90;
+ @b = (67..74);
+ is("@a", "@b");
+ @a = unpack '(SL)*SL', pack 'SLSLSLSL', 67..90;
+ is("@a", "@b");
+ eval { @a = unpack '(*SL)', '' };
+ like($@, qr/\(\)-group starts with a count/);
+ eval { @a = unpack '(3SL)', '' };
+ like($@, qr/\(\)-group starts with a count/);
+ eval { @a = unpack '([3]SL)', '' };
+ like($@, qr/\(\)-group starts with a count/);
+ eval { @a = pack '(*SL)' };
+ like($@, qr/\(\)-group starts with a count/);
+ @a = unpack '(SL)3 SL', pack '(SL)4', 67..74;
+ is("@a", "@b");
+ @a = unpack '(SL)3 SL', pack '(SL)[4]', 67..74;
+ is("@a", "@b");
+ @a = unpack '(SL)3 SL', pack '(SL)*', 67..74;
+ is("@a", "@b");
+}