summaryrefslogtreecommitdiff
path: root/pp_pack.c
diff options
context:
space:
mode:
authorTon Hospel <perl5-porters@ton.iguana.be>2005-03-27 18:32:11 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2005-03-30 13:16:52 +0000
commit28be1210e1847088dea44932568ceeb145a4a140 (patch)
tree0ccb78fd71d51fa15cb0883d19178e60c7f54959 /pp_pack.c
parent47660177f659a8fbe5e2bac72a2bdfad9744a453 (diff)
downloadperl-28be1210e1847088dea44932568ceeb145a4a140.tar.gz
Re: PATCH: byte count feature request for unpack
Message-Id: <d26u7b$i3v$1@post.home.lunix> (rework of a patch from Arne Ahrend <aahrend@web.de>) p4raw-id: //depot/perl@24100
Diffstat (limited to 'pp_pack.c')
-rw-r--r--pp_pack.c141
1 files changed, 109 insertions, 32 deletions
diff --git a/pp_pack.c b/pp_pack.c
index 98f1bed837..dcebd5bdab 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -183,9 +183,9 @@ S_mul128(pTHX_ SV *sv, U8 m)
#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
#ifdef PERL_PACK_CAN_SHRIEKSIGN
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
#else
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
#endif
#ifndef PERL_PACK_CAN_BYTEORDER
@@ -761,13 +761,18 @@ S_measure_struct(pTHX_ tempsym_t* symptr)
Perl_croak(aTHX_ "Invalid type '%c' in %s",
(int)TYPE_NO_MODIFIERS(symptr->code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
+ case '.':
case '/':
case 'U': /* XXXX Is it correct? */
case 'w':
case 'u':
Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
- (int)symptr->code,
+ (int) TYPE_NO_MODIFIERS(symptr->code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
case '%':
size = 0;
@@ -1177,11 +1182,11 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
UV cuv = 0;
NV cdouble = 0.0;
const int bits_in_uv = CHAR_BIT * sizeof(cuv);
- char* strrelbeg = s;
bool beyond = FALSE;
bool explicit_length;
bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+ symptr->strbeg = s - strbeg;
while (next_symbol(symptr)) {
packprops_t props;
@@ -1242,6 +1247,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
symptr->flags |= group_modifiers;
symptr->patend = savsym.grpend;
+ symptr->previous = &savsym;
symptr->level++;
PUTBACK;
while (len--) {
@@ -1253,14 +1259,46 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
break; /* No way to continue */
}
SPAGAIN;
- symptr->flags &= ~group_modifiers;
- savsym.flags = symptr->flags;
+ savsym.flags = symptr->flags & ~group_modifiers;
*symptr = savsym;
break;
}
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+#endif
+ case '.': {
+ char *from;
+ SV *sv;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ bool u8 = utf8;
+#endif
+ if (howlen == e_star) from = strbeg;
+ else if (len <= 0) from = s;
+ else {
+ tempsym_t *group = symptr;
+
+ while (--len && group) group = group->previous;
+ from = group ? strbeg + group->strbeg : strbeg;
+ }
+ sv = from <= s ?
+ newSVuv( u8 ? (UV) utf8_length(from, s) : (UV) (s-from)) :
+ newSViv(-(u8 ? (IV) utf8_length(s, from) : (IV) (from-s)));
+ XPUSHs(sv_2mortal(sv));
+ break;
+ }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
- if (utf8) {
- s = strrelbeg;
+ s = strbeg + symptr->strbeg;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ if (utf8)
+#endif
+ {
while (len > 0) {
if (s >= strend)
Perl_croak(aTHX_ "'@' outside of string in unpack");
@@ -1270,9 +1308,9 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
if (s > strend)
Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
} else {
- if (len > strend - strrelbeg)
+ if (strend-s < len)
Perl_croak(aTHX_ "'@' outside of string in unpack");
- s = strrelbeg + len;
+ s += len;
}
break;
case 'X' | TYPE_IS_SHRIEKING:
@@ -1379,7 +1417,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
!is_utf8_space((U8 *) ptr)) break;
if (ptr >= s) ptr += UTF8SKIP(ptr);
else ptr++;
- if (ptr > s+len)
+ if (ptr > s+len)
Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
} else {
for (ptr = s+len-1; ptr >= s; ptr--)
@@ -2513,30 +2551,65 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
(int) TYPE_NO_MODIFIERS(datumtype));
case '%':
Perl_croak(aTHX_ "'%%' may not be used in pack");
+ {
+ char *from;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+#endif
+ case '.':
+ if (howlen == e_star) from = start;
+ else if (len == 0) from = cur;
+ else {
+ tempsym_t *group = symptr;
+
+ while (--len && group) group = group->previous;
+ from = group ? start + group->strbeg : start;
+ }
+ fromstr = NEXTFROM;
+ len = SvIV(fromstr);
+ goto resize;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
- if (utf8) {
- char *s = start + symptr->strbeg;
- while (len > 0 && s < cur) {
- s += UTF8SKIP(s);
- len--;
+ from = start + symptr->strbeg;
+ resize:
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ if (utf8)
+#endif
+ if (len >= 0) {
+ while (len && from < cur) {
+ from += UTF8SKIP(from);
+ len--;
+ }
+ if (from > cur)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ if (len) {
+ /* Here we know from == cur */
+ grow:
+ GROWING(0, cat, start, cur, len);
+ Zero(cur, len, char);
+ cur += len;
+ } else if (from < cur) {
+ len = cur - from;
+ goto shrink;
+ } else goto no_change;
+ } else {
+ cur = from;
+ len = -len;
+ goto utf8_shrink;
}
- if (s > cur)
- Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
- if (len > 0) {
- grow:
- GROWING(0, cat, start, cur, len);
- Zero(cur, len, char);
- cur += len;
- } else if (s < cur) cur = s;
- else goto no_change;
- } else {
- len -= cur - (start+symptr->strbeg);
+ else {
+ len -= cur - from;
if (len > 0) goto grow;
+ if (len == 0) goto no_change;
len = -len;
- if (len > 0) goto shrink;
- else goto no_change;
+ goto shrink;
}
break;
+ }
case '(': {
tempsym_t savsym = *symptr;
U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
@@ -2585,19 +2658,23 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
case 'X':
if (utf8) {
if (len < 1) goto no_change;
+ utf8_shrink:
while (len > 0) {
if (cur <= start)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
while (--cur, UTF8_IS_CONTINUATION(*cur)) {
if (cur <= start)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
}
len--;
}
} else {
shrink:
if (cur - start < len)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
cur -= len;
}
if (cur < start+symptr->strbeg) {