summaryrefslogtreecommitdiff
path: root/dquote.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-02-13 11:53:50 -0700
committerKarl Williamson <khw@cpan.org>2016-02-18 20:44:02 -0700
commitce54a8b9b1353b2e7e84528e499a996fb0697a95 (patch)
tree0c54b1b6c52121cfbaedad123f8b3060c1170a84 /dquote.c
parent3a3d108484629fe5b421976b8d6fd6f280a1f97a (diff)
downloadperl-ce54a8b9b1353b2e7e84528e499a996fb0697a95.tar.gz
regcomp.c, toke.c: swap functions being inline static
grok_bslash_x() is so large that no compiler will inline it. Move it to dquote.c from dq_inline.c. Conversely, move form_octal_warning() to dq_inline.c. It is so tiny that the function call overhead is scarcely smaller than the function body. This also moves things in embed.fnc so all these functions. are not visible outside the few files they are supposed to be used in.
Diffstat (limited to 'dquote.c')
-rw-r--r--dquote.c134
1 files changed, 113 insertions, 21 deletions
diff --git a/dquote.c b/dquote.c
index 895f17dc9d..e02308e7ac 100644
--- a/dquote.c
+++ b/dquote.c
@@ -8,6 +8,7 @@
#include "EXTERN.h"
#define PERL_IN_DQUOTE_C
#include "perl.h"
+#include "dquote_inline.h"
/* XXX Add documentation after final interface and behavior is decided */
/* May want to show context for error, so would pass S_grok_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
@@ -161,33 +162,124 @@ Perl_grok_bslash_o(pTHX_ char **s, UV *uv, const char** error_msg,
return TRUE;
}
-char*
-Perl_form_short_octal_warning(pTHX_
- const char * const s, /* Points to first non-octal */
- const STRLEN len /* Length of octals string, so
- (s-len) points to first
- octal */
-) {
- /* Return a character string consisting of a warning message for when a
- * string constant in octal is weird, like "\078". */
+bool
+Perl_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg,
+ const bool output_warning, const bool strict,
+ const bool silence_non_portable,
+ const bool UTF)
+{
- const char * sans_leading_zeros = s - len;
+/* Documentation to be supplied when interface nailed down finally
+ * This returns FALSE if there is an error which the caller need not recover
+ * from; otherwise TRUE.
+ * It guarantees that the returned codepoint, *uv, when expressed as
+ * utf8 bytes, would fit within the skipped "\x{...}" bytes.
+ *
+ * On input:
+ * s is the address of a pointer to a NULL terminated string that begins
+ * with 'x', and the previous character was a backslash. At exit, *s
+ * will be advanced to the byte just after those absorbed by this
+ * function. Hence the caller can continue parsing from there. In
+ * the case of an error, this routine has generally positioned *s to
+ * point just to the right of the first bad spot, so that a message
+ * that has a "<--" to mark the spot will be correctly positioned.
+ * uv points to a UV that will hold the output value, valid only if the
+ * return from the function is TRUE
+ * error_msg is a pointer that will be set to an internal buffer giving an
+ * error message upon failure (the return is FALSE). Untouched if
+ * function succeeds
+ * output_warning says whether to output any warning messages, or suppress
+ * them
+ * strict is true if anything out of the ordinary should cause this to
+ * fail instead of warn or be silent. For example, it requires
+ * exactly 2 digits following the \x (when there are no braces).
+ * 3 digits could be a mistake, so is forbidden in this mode.
+ * silence_non_portable is true if to suppress warnings about the code
+ * point returned being too large to fit on all platforms.
+ * UTF is true iff the string *s is encoded in UTF-8.
+ */
+ char* e;
+ STRLEN numbers_len;
+ I32 flags = PERL_SCAN_DISALLOW_PREFIX;
+#ifdef DEBUGGING
+ char *start = *s - 1;
+ assert(*start == '\\');
+#endif
- PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
+ PERL_ARGS_ASSERT_GROK_BSLASH_X;
- assert(*s == '8' || *s == '9');
+ assert(**s == 'x');
+ (*s)++;
- /* Remove the leading zeros, retaining one zero so won't be zero length */
- while (*sans_leading_zeros == '0') sans_leading_zeros++;
- if (sans_leading_zeros == s) {
- sans_leading_zeros--;
+ if (strict || ! output_warning) {
+ flags |= PERL_SCAN_SILENT_ILLDIGIT;
}
- return Perl_form(aTHX_
- "'%.*s' resolved to '\\o{%.*s}%c'",
- (int) (len + 2), s - len - 1,
- (int) (s - sans_leading_zeros), sans_leading_zeros,
- *s);
+ if (**s != '{') {
+ STRLEN len = (strict) ? 3 : 2;
+
+ *uv = grok_hex(*s, &len, &flags, NULL);
+ *s += len;
+ if (strict && len != 2) {
+ if (len < 2) {
+ *s += (UTF) ? UTF8SKIP(*s) : 1;
+ *error_msg = "Non-hex character";
+ }
+ else {
+ *error_msg = "Use \\x{...} for more than two hex characters";
+ }
+ return FALSE;
+ }
+ return TRUE;
+ }
+
+ e = strchr(*s, '}');
+ if (!e) {
+ (*s)++; /* Move past the '{' */
+ while (isXDIGIT(**s)) { /* Position beyond the legal digits */
+ (*s)++;
+ }
+ /* XXX The corresponding message above for \o is just '\\o{'; other
+ * messages for other constructs include the '}', so are inconsistent.
+ */
+ *error_msg = "Missing right brace on \\x{}";
+ return FALSE;
+ }
+
+ (*s)++; /* Point to expected first digit (could be first byte of utf8
+ sequence if not a digit) */
+ numbers_len = e - *s;
+ if (numbers_len == 0) {
+ if (strict) {
+ (*s)++; /* Move past the } */
+ *error_msg = "Number with no digits";
+ return FALSE;
+ }
+ *s = e + 1;
+ *uv = 0;
+ return TRUE;
+ }
+
+ flags |= PERL_SCAN_ALLOW_UNDERSCORES;
+ if (silence_non_portable) {
+ flags |= PERL_SCAN_SILENT_NON_PORTABLE;
+ }
+
+ *uv = grok_hex(*s, &numbers_len, &flags, NULL);
+ /* Note that if has non-hex, will ignore everything starting with that up
+ * to the '}' */
+
+ if (strict && numbers_len != (STRLEN) (e - *s)) {
+ *s += numbers_len;
+ *s += (UTF) ? UTF8SKIP(*s) : 1;
+ *error_msg = "Non-hex character";
+ return FALSE;
+ }
+
+ /* Return past the '}' */
+ *s = e + 1;
+
+ return TRUE;
}
/*