pdfi_read_num: Further minor speedups.

Keep track of the integer that we're reading as we read it, rather than reparsing it from the buffer at the end.
author: Robin Watts <Robin.Watts@artifex.com> 2022-02-14 12:03:52 +0000
committer: Robin Watts <Robin.Watts@artifex.com> 2022-02-15 12:15:37 +0000
commit: 387ed3467ff3e0b5c65aaae0fa8b7975aed5b9ce (patch)
tree: 5bdd96613bded0b293c1bdf2647d6a7f50021a75 /pdf/pdf_int.c
parent: f76b046de50ec1b3b58504d622a0ef59eb41a5a2 (diff)
download: ghostpdl-387ed3467ff3e0b5c65aaae0fa8b7975aed5b9ce.tar.gz
1 files changed, 43 insertions, 50 deletions
diff --git a/pdf/pdf_int.c b/pdf/pdf_int.c
index f9bbe960d..58018f1d0 100644
--- a/pdf/pdf_int.c
+++ b/pdf/pdf_int.c
@@ -178,29 +178,6 @@ static float acrobat_compatible_atof(char *s)
     }
 }
 
-/* Fast but inaccurate atoi, lifted from MuPDF. */
-static int fast_atoi(char *s)
-{
-    int neg = 0;
-    int i = 0;
-
-    while (*s == '-') {
-        neg = 1;
-        ++s;
-    }
-    while (*s == '+') {
-        ++s;
-    }
-
-    while (*s >= '0' && *s <= '9') {
-        /* We deliberately ignore overflow here. */
-        i = i * 10 + (*s - '0');
-        ++s;
-    }
-
-    return neg ? -i : i;
-}
-
 static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_num, uint32_t indirect_gen)
 {
     byte Buffer[256];
@@ -210,7 +187,8 @@ static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_nu
     bool has_exponent = false;
     unsigned short exponent_index = 0;
     pdf_num *num;
-    int code = 0, malformed = false, doubleneg = false, recovered = false;
+    int code = 0, malformed = false, doubleneg = false, recovered = false, negative = false;
+    int int_val = 0;
 
     pdfi_skip_white(ctx, s);
 
@@ -224,19 +202,19 @@ static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_nu
         if (c < 0)
             return_error(gs_error_ioerror);
 
-        Buffer[index] = (byte)c;
-
-        if (iswhite((char)Buffer[index])) {
+        if (iswhite(c)) {
+            Buffer[index] = 0x00;
+            break;
+        } else if (isdelimiter(c)) {
+            pdfi_unread_byte(ctx, s, (byte)c);
             Buffer[index] = 0x00;
             break;
-        } else {
-            if (isdelimiter((char)Buffer[index])) {
-                pdfi_unread_byte(ctx, s, (byte)c);
-                Buffer[index] = 0x00;
-                break;
-            }
         }
-        if (Buffer[index] == '.') {
+        Buffer[index] = (byte)c;
+
+        if (c >= '0' && c <= '9') {
+            int_val = int_val*10 + c - '0';
+        } else if (c == '.') {
             if (has_decimal_point == true) {
                 if (ctx->args.pdfstoponerror)
                     return_error(gs_error_syntaxerror);
@@ -245,7 +223,7 @@ static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_nu
                 has_decimal_point = true;
                 real = true;
             }
-        } else if (Buffer[index] == 'e' || Buffer[index] == 'E') {
+        } else if (c == 'e' || c == 'E') {
             /* TODO: technically scientific notation isn't in PDF spec,
              * but gs seems to accept it, so we should also?
              */
@@ -259,28 +237,43 @@ static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_nu
                 exponent_index = index;
                 real = true;
             }
-        } else if (Buffer[index] == '-' || Buffer[index] == '+') {
+        } else if (c == '-') {
+            /* Any - sign not at the start of the string, or just after an exponent
+             * indicates a malformed number. */
             if (!(index == 0 || (has_exponent && index == exponent_index+1))) {
+                pdfi_set_error(ctx, 0, NULL, E_PDF_MALFORMEDNUMBER, "pdfi_read_num", NULL);
                 if (ctx->args.pdfstoponerror)
                     return_error(gs_error_syntaxerror);
-                /* Acrobat weirdness. We need to know if a number starts with two - signs
-                 * because Acrobat treats real and integers defined this way differently!
-                 * Double-negated integers are treated as 0, and reals are treated as if
-                 * they had one negative sign. We can't tell whether the number is a real
-                 * or not yet, we do that below.
-                 */
-                pdfi_set_error(ctx, 0, NULL, E_PDF_MALFORMEDNUMBER, "pdfi_read_num", NULL);
-                if (Buffer[index - 1] == '-') {
-                    doubleneg = true;
-                    index -= 1;
+                if (Buffer[index - 1] != '-') {
+                    /* We are parsing a number line 123-56. We should continue parsing, but
+                     * ignore anything from the second -. */
+                    malformed = true;
+                    Buffer[index] = 0;
+                    recovered = true;
                 }
-                else {
+            }
+            if (!has_exponent) {
+                doubleneg = negative;
+                negative = 1;
+            }
+        } else if (c == '+') {
+            if (index == 0 || (has_exponent && index == exponent_index+1)) {
+                /* Just drop the + it's pointless, and it'll get in the way
+                 * of our negation handling for floats. */
+                index--;
+            } else {
+                pdfi_set_error(ctx, 0, NULL, E_PDF_MALFORMEDNUMBER, "pdfi_read_num", NULL);
+                if (ctx->args.pdfstoponerror)
+                    return_error(gs_error_syntaxerror);
+                if (Buffer[index - 1] != '-') {
+                    /* We are parsing a number line 123-56. We should continue parsing, but
+                     * ignore anything from the second -. */
                     malformed = true;
-                    Buffer[index] = 0x00;
+                    Buffer[index] = 0;
                     recovered = true;
                 }
             }
-        } else if (Buffer[index] < 0x30 || Buffer[index] > 0x39) {
+        } else {
             pdfi_set_error(ctx, 0, NULL, E_PDF_MISSINGWHITESPACE, "pdfi_read_num", (char *)"Ignoring missing white space while parsing number");
             if (ctx->args.pdfstoponerror)
                 return_error(gs_error_syntaxerror);
@@ -319,7 +312,7 @@ static int pdfi_read_num(pdf_context *ctx, pdf_c_stream *s, uint32_t indirect_nu
     } else if (real) {
         num->value.d = acrobat_compatible_atof((char *)Buffer);
     } else {
-        num->value.i = fast_atoi((char *)Buffer);
+        num->value.i = doubleneg ? 0 : negative ? -int_val : int_val;
     }
     if (ctx->args.pdfdebug) {
         if (real)
author	Robin Watts <Robin.Watts@artifex.com>	2022-02-14 12:03:52 +0000
committer	Robin Watts <Robin.Watts@artifex.com>	2022-02-15 12:15:37 +0000
commit	387ed3467ff3e0b5c65aaae0fa8b7975aed5b9ce (patch)
tree	5bdd96613bded0b293c1bdf2647d6a7f50021a75 /pdf/pdf_int.c
parent	f76b046de50ec1b3b58504d622a0ef59eb41a5a2 (diff)
download	ghostpdl-387ed3467ff3e0b5c65aaae0fa8b7975aed5b9ce.tar.gz