summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc5
-rw-r--r--ext/XS-APItest/t/delimcpy.t5
-rw-r--r--proto.h4
-rw-r--r--util.c207
4 files changed, 175 insertions, 46 deletions
diff --git a/embed.fnc b/embed.fnc
index be253fe72f..e088cd844c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -753,8 +753,9 @@ Ap |I32 |debop |NN const OP* o
Ap |I32 |debstack
Ap |I32 |debstackptrs
pR |SV * |defelem_target |NN SV *sv|NULLOK MAGIC *mg
-ATp |char* |delimcpy |NN char* to|NN const char* toend|NN const char* from \
- |NN const char* fromend|int delim|NN I32* retlen
+ATpd |char* |delimcpy|NN char* to|NN const char* to_end \
+ |NN const char* from|NN const char* from_end \
+ |const int delim|NN I32* retlen
Tpd |char* |delimcpy_no_escape|NN char* to|NN const char* toend \
|NN const char* from \
|NN const char* fromend|int delim \
diff --git a/ext/XS-APItest/t/delimcpy.t b/ext/XS-APItest/t/delimcpy.t
index e72c9903b4..9d2c7d133e 100644
--- a/ext/XS-APItest/t/delimcpy.t
+++ b/ext/XS-APItest/t/delimcpy.t
@@ -132,16 +132,17 @@ foreach my $d ("x", "\0") { # Try both printable and NUL delimiters
} while ($trunc_dest_len > 0);
}
-TODO: {
+{
# Repeat a few of the tests with a backslash delimiter, which means there
# is no possibiliby of an escape
- local $TODO = 'current code is broken when the delimiter is a backslash';
my $d = "\\";
my $source = $ib;
my $source_len = 1;
my $should_be = $source;
+ pass 'delimiter is a backslash for the rest of the tests';
+
$ret = test_delimcpy($source, $source_len, $d, $source_len, $source_len, $poison);
is($ret->[0], expected($source, $source_len, $poison, $source_len),
"delimcpy works when there is no delimiter at all");
diff --git a/proto.h b/proto.h
index 8a211e84ab..4abb9769b2 100644
--- a/proto.h
+++ b/proto.h
@@ -837,9 +837,9 @@ PERL_CALLCONV SV * Perl_defelem_target(pTHX_ SV *sv, MAGIC *mg)
PERL_CALLCONV void Perl_delete_eval_scope(pTHX);
#define PERL_ARGS_ASSERT_DELETE_EVAL_SCOPE
-PERL_CALLCONV char* Perl_delimcpy(char* to, const char* toend, const char* from, const char* fromend, int delim, I32* retlen);
+PERL_CALLCONV char* Perl_delimcpy(char* to, const char* to_end, const char* from, const char* from_end, const int delim, I32* retlen);
#define PERL_ARGS_ASSERT_DELIMCPY \
- assert(to); assert(toend); assert(from); assert(fromend); assert(retlen)
+ assert(to); assert(to_end); assert(from); assert(from_end); assert(retlen)
PERL_CALLCONV char* Perl_delimcpy_no_escape(char* to, const char* toend, const char* from, const char* fromend, int delim, I32* retlen);
#define PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE \
assert(to); assert(toend); assert(from); assert(fromend); assert(retlen)
diff --git a/util.c b/util.c
index df43cdd237..5d2d1ba17c 100644
--- a/util.c
+++ b/util.c
@@ -533,44 +533,13 @@ Free_t Perl_mfree (Malloc_t where)
#endif
-/* copy a string up to some (non-backslashed) delimiter, if any.
- * With allow_escape, converts \<delimiter> to <delimiter>, while leaves
- * \<non-delimiter> as-is.
- * Returns the position in the src string of the closing delimiter, if
- * any, or returns fromend otherwise.
- * This is the internal implementation for Perl_delimcpy and
- * Perl_delimcpy_no_escape.
- */
-
-static char *
-S_delimcpy_intern(char *to, const char *toend, const char *from,
- const char *fromend, int delim, I32 *retlen,
- const bool allow_escape)
-{
- I32 tolen;
-
- PERL_ARGS_ASSERT_DELIMCPY;
-
- for (tolen = 0; from < fromend; from++, tolen++) {
- if (allow_escape && *from == '\\' && from + 1 < fromend) {
- if (from[1] != delim) {
- if (to < toend)
- *to++ = *from;
- tolen++;
- }
- from++;
- }
- else if (*from == delim)
- break;
- if (to < toend)
- *to++ = *from;
- }
- if (to < toend)
- *to = '\0';
- *retlen = tolen;
- return (char *)from;
-}
-
+/* This is the value stored in *retlen in the two delimcpy routines below when
+ * there wasn't enough room in the destination to store everything it was asked
+ * to. The value is deliberately very large so that hopefully if code uses it
+ * unquestioninly to access memory, it will likely segfault. And it is small
+ * enough that if the caller does some arithmetic on it before accessing, it
+ * won't overflow into a small legal number. */
+#define DELIMCPY_OUT_OF_BOUNDS_RET I32_MAX
/*
=for apidoc_section String Handling
@@ -627,12 +596,170 @@ Perl_delimcpy_no_escape(char *to, const char *toend, const char *from,
return (char *) from + copy_len;
}
+/*
+=for apidoc delimcpy
+
+Copy a source buffer to a destination buffer, stopping at (but not including)
+the first occurrence in the source of an unescaped (defined below) delimiter
+byte, C<delim>. The source is the bytes between S<C<from> and C<from_end> -
+1>. Similarly, the dest is C<to> up to C<to_end>.
+
+The number of bytes copied is written to C<*retlen>.
+
+Returns the position of the first uncopied C<delim> in the C<from> buffer, but
+if there is no such occurrence before C<from_end>, then C<from_end> is returned,
+and the entire buffer S<C<from> .. C<from_end> - 1> is copied.
+
+If there is room in the destination available after the copy, an extra
+terminating safety C<NUL> byte is appended (not included in the returned
+length).
+
+The error case is if the destination buffer is not large enough to accommodate
+everything that should be copied. In this situation, a value larger than
+S<C<to_end> - C<to>> is written to C<*retlen>, and as much of the source as
+fits will be written to the destination. Not having room for the safety C<NUL>
+is not considered an error.
+
+In the following examples, let C<x> be the delimiter, and C<0> represent a C<NUL>
+byte (B<NOT> the digit C<0>). Then we would have
+
+ Source Destination
+ abcxdef abc0
+
+provided the destination buffer is at least 4 bytes long.
+
+An escaped delimiter is one which is immediately preceded by a single
+backslash. Escaped delimiters are copied, and the copy continues past the
+delimiter; the backslash is not copied:
+
+ Source Destination
+ abc\xdef abcxdef0
+
+(provided the destination buffer is at least 8 bytes long).
+
+It's actually somewhat more complicated than that. A sequence of any odd number
+of backslashes escapes the following delimiter, and the copy continues with
+exactly one of the backslashes stripped.
+
+ Source Destination
+ abc\xdef abcxdef0
+ abc\\\xdef abc\\xdef0
+ abc\\\\\xdef abc\\\\xdef0
+
+(as always, if the destination is large enough)
+
+An even number of preceding backslashes does not escape the delimiter, so that
+the copy stops just before it, and includes all the backslashes (no stripping;
+zero is considered even):
+
+ Source Destination
+ abcxdef abc0
+ abc\\xdef abc\\0
+ abc\\\\xdef abc\\\\0
+
+=cut
+*/
+
char *
-Perl_delimcpy(char *to, const char *toend, const char *from, const char *fromend, int delim, I32 *retlen)
+Perl_delimcpy(char *to, const char *to_end,
+ const char *from, const char *from_end,
+ const int delim, I32 *retlen)
{
+ const char * const orig_to = to;
+ Ptrdiff_t copy_len = 0;
+ bool stopped_early = FALSE; /* Ran out of room to copy to */
+
PERL_ARGS_ASSERT_DELIMCPY;
+ assert(from_end >= from);
+ assert(to_end >= to);
+
+ /* Don't use the loop for the trivial case of the first character being the
+ * delimiter; otherwise would have to worry inside the loop about backing
+ * up before the start of 'from' */
+ if (LIKELY(from_end > from && *from != delim)) {
+ while ((copy_len = from_end - from) > 0) {
+ const char * backslash_pos;
+ const char * delim_pos;
+
+ /* Look for the next delimiter in the remaining portion of the
+ * source. A loop invariant is that we already know that the copy
+ * should include *from; this comes from the conditional before the
+ * loop, and how we set things up at the end of each iteration */
+ delim_pos = (const char *) memchr(from + 1, delim, copy_len - 1);
+
+ /* If didn't find it, done looking; set up so copies all of the
+ * source */
+ if (! delim_pos) {
+ copy_len = from_end - from;
+ break;
+ }
+
+ /* Look for a backslash immediately before the delimiter */
+ backslash_pos = delim_pos - 1;
+
+ /* If the delimiter is not escaped, this ends the copy */
+ if (*backslash_pos != '\\') {
+ copy_len = delim_pos - from;
+ break;
+ }
+
+ /* Here there is a backslash just before the delimiter, but it
+ * could be the final backslash in a sequence of them. Backup to
+ * find the first one in it. */
+ do {
+ backslash_pos--;
+ }
+ while (backslash_pos >= from && *backslash_pos == '\\');
+
+ /* If the number of backslashes is even, they just escape one
+ * another, leaving the delimiter unescaped, and stopping the copy.
+ * */
+ if (! ((delim_pos - (backslash_pos + 1)) & 1)) {
+ copy_len = delim_pos - from; /* even, copy up to delimiter */
+ break;
+ }
+
+ /* Here is odd, so the delimiter is escaped. We will try to copy
+ * all but the final backslash in the sequence */
+ copy_len = delim_pos - 1 - from;
- return S_delimcpy_intern(to, toend, from, fromend, delim, retlen, 1);
+ /* Do the copy, but not beyond the end of the destination */
+ if (copy_len >= to_end - to) {
+ Copy(from, to, to_end - to, char);
+ stopped_early = TRUE;
+ to = (char *) to_end;
+ }
+ else {
+ Copy(from, to, copy_len, char);
+ to += copy_len;
+ }
+
+ /* Set up so next iteration will include the delimiter */
+ from = delim_pos;
+ }
+ }
+
+ /* Here, have found the final segment to copy. Copy that, but not beyond
+ * the size of the destination. If not enough room, copy as much as can
+ * fit, and set error return */
+ if (stopped_early || copy_len > to_end - to) {
+ Copy(from, to, to_end - to, char);
+ *retlen = DELIMCPY_OUT_OF_BOUNDS_RET;
+ }
+ else {
+ Copy(from, to, copy_len, char);
+
+ to += copy_len;
+
+ /* If there is extra space available, add a trailing NUL */
+ if (to < to_end) {
+ *to = '\0';
+ }
+
+ *retlen = to - orig_to;
+ }
+
+ return (char *) from + copy_len;
}
/*