summaryrefslogtreecommitdiff
path: root/dquote.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2015-07-19 16:54:22 +0300
committerJarkko Hietaniemi <jhi@iki.fi>2015-07-22 08:33:24 -0400
commita55c5245146801ac2b2be2914342a6a829670513 (patch)
tree5a49e31da2c0e06bf68f46a0c8b8c6d4cf9f1d4e /dquote.c
parentf7e03a1013a0b759f99885c500f0e89656a992ba (diff)
downloadperl-a55c5245146801ac2b2be2914342a6a829670513.tar.gz
dquote_static.c -> dquote.c
Instead of #include-ing the C file, compile it normally.
Diffstat (limited to 'dquote.c')
-rw-r--r--dquote.c199
1 files changed, 199 insertions, 0 deletions
diff --git a/dquote.c b/dquote.c
new file mode 100644
index 0000000000..9d35f20d97
--- /dev/null
+++ b/dquote.c
@@ -0,0 +1,199 @@
+/* dquote.c
+ *
+ * This file contains functions that are related to
+ * parsing double-quotish expressions.
+ *
+*/
+
+#include "EXTERN.h"
+#define PERL_IN_DQUOTE_C
+#include "perl.h"
+
+/* XXX Add documentation after final interface and behavior is decided */
+/* May want to show context for error, so would pass S_grok_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
+ U8 source = *current;
+*/
+
+char
+Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
+{
+
+ U8 result;
+
+ if (! isPRINT_A(source)) {
+ Perl_croak(aTHX_ "%s",
+ "Character following \"\\c\" must be printable ASCII");
+ }
+ else if (source == '{') {
+ const char control = toCTRL('{');
+ if (isPRINT_A(control)) {
+ /* diag_listed_as: Use "%s" instead of "%s" */
+ Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", control);
+ }
+ else {
+ Perl_croak(aTHX_ "Sequence \"\\c{\" invalid");
+ }
+ }
+
+ result = toCTRL(source);
+ if (output_warning && isPRINT_A(result)) {
+ U8 clearer[3];
+ U8 i = 0;
+ if (! isWORDCHAR(result)) {
+ clearer[i++] = '\\';
+ }
+ clearer[i++] = result;
+ clearer[i++] = '\0';
+
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\"\\c%c\" is more clearly written simply as \"%s\"",
+ source,
+ clearer);
+ }
+
+ return result;
+}
+
+bool
+Perl_grok_bslash_o(pTHX_ char **s, UV *uv, const char** error_msg,
+ const bool output_warning, const bool strict,
+ const bool silence_non_portable,
+ const bool UTF)
+{
+
+/* Documentation to be supplied when interface nailed down finally
+ * This returns FALSE if there is an error which the caller need not recover
+ * from; otherwise TRUE. In either case the caller should look at *len [???].
+ * It guarantees that the returned codepoint, *uv, when expressed as
+ * utf8 bytes, would fit within the skipped "\o{...}" bytes.
+ * On input:
+ * s is the address of a pointer to a NULL terminated string that begins
+ * with 'o', and the previous character was a backslash. At exit, *s
+ * will be advanced to the byte just after those absorbed by this
+ * function. Hence the caller can continue parsing from there. In
+ * the case of an error, this routine has generally positioned *s to
+ * point just to the right of the first bad spot, so that a message
+ * that has a "<--" to mark the spot will be correctly positioned.
+ * uv points to a UV that will hold the output value, valid only if the
+ * return from the function is TRUE
+ * error_msg is a pointer that will be set to an internal buffer giving an
+ * error message upon failure (the return is FALSE). Untouched if
+ * function succeeds
+ * output_warning says whether to output any warning messages, or suppress
+ * them
+ * strict is true if this should fail instead of warn if there are
+ * non-octal digits within the braces
+ * silence_non_portable is true if to suppress warnings about the code
+ * point returned being too large to fit on all platforms.
+ * UTF is true iff the string *s is encoded in UTF-8.
+ */
+ char* e;
+ STRLEN numbers_len;
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX
+ /* XXX Until the message is improved in grok_oct, handle errors
+ * ourselves */
+ | PERL_SCAN_SILENT_ILLDIGIT;
+
+#ifdef DEBUGGING
+ char *start = *s - 1;
+ assert(*start == '\\');
+#endif
+
+ PERL_ARGS_ASSERT_GROK_BSLASH_O;
+
+
+ assert(**s == 'o');
+ (*s)++;
+
+ if (**s != '{') {
+ *error_msg = "Missing braces on \\o{}";
+ return FALSE;
+ }
+
+ e = strchr(*s, '}');
+ if (!e) {
+ (*s)++; /* Move past the '{' */
+ while (isOCTAL(**s)) { /* Position beyond the legal digits */
+ (*s)++;
+ }
+ *error_msg = "Missing right brace on \\o{";
+ return FALSE;
+ }
+
+ (*s)++; /* Point to expected first digit (could be first byte of utf8
+ sequence if not a digit) */
+ numbers_len = e - *s;
+ if (numbers_len == 0) {
+ (*s)++; /* Move past the } */
+ *error_msg = "Number with no digits";
+ return FALSE;
+ }
+
+ if (silence_non_portable) {
+ flags |= PERL_SCAN_SILENT_NON_PORTABLE;
+ }
+
+ *uv = grok_oct(*s, &numbers_len, &flags, NULL);
+ /* Note that if has non-octal, will ignore everything starting with that up
+ * to the '}' */
+
+ if (numbers_len != (STRLEN) (e - *s)) {
+ if (strict) {
+ *s += numbers_len;
+ *s += (UTF) ? UTF8SKIP(*s) : (STRLEN) 1;
+ *error_msg = "Non-octal character";
+ return FALSE;
+ }
+ else if (output_warning) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
+ /* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
+ "Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
+ *(*s + numbers_len),
+ (int) numbers_len,
+ *s);
+ }
+ }
+
+ /* Return past the '}' */
+ *s = e + 1;
+
+ /* guarantee replacing "\o{...}" with utf8 bytes fits within
+ * existing space */
+ assert(OFFUNISKIP(*uv) < *s - start);
+
+ return TRUE;
+}
+
+char*
+Perl_form_short_octal_warning(pTHX_
+ const char * const s, /* Points to first non-octal */
+ const STRLEN len /* Length of octals string, so
+ (s-len) points to first
+ octal */
+) {
+ /* Return a character string consisting of a warning message for when a
+ * string constant in octal is weird, like "\078". */
+
+ const char * sans_leading_zeros = s - len;
+
+ PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
+
+ assert(*s == '8' || *s == '9');
+
+ /* Remove the leading zeros, retaining one zero so won't be zero length */
+ while (*sans_leading_zeros == '0') sans_leading_zeros++;
+ if (sans_leading_zeros == s) {
+ sans_leading_zeros--;
+ }
+
+ return Perl_form(aTHX_
+ "'%.*s' resolved to '\\o{%.*s}%c'",
+ (int) (len + 2), s - len - 1,
+ (int) (s - sans_leading_zeros), sans_leading_zeros,
+ *s);
+}
+
+/*
+ * ex: set ts=8 sts=4 sw=4 et:
+ */