diff options
author | Kevin Ryde <user42@zip.com.au> | 2001-11-15 22:34:54 +0100 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2001-11-15 22:34:54 +0100 |
commit | c273ca3fd5ad12ce3562f4a82ba03c71eb77b2f4 (patch) | |
tree | 3b91b77b87d6695c7335d36595b8e2b15d957e1c /scanf | |
parent | 4e3b3b0cffebcc5b7ec1bd8deab32d393ffb736f (diff) | |
download | gmp-c273ca3fd5ad12ce3562f4a82ba03c71eb77b2f4.tar.gz |
* scanf/doscan.c, scanf/fscanf.c, scanf/fscanffuns.c, scanf/scanf.c,
scanf/sscanf.c, scanf/sscanffuns.c, scanf/vfscanf.c, scanf/vscanf.c,
scanf/vsscanf.c, scanf/Makefile.am, tests/misc/t-scanf.c: New files.
Diffstat (limited to 'scanf')
-rw-r--r-- | scanf/doscan.c | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/scanf/doscan.c b/scanf/doscan.c new file mode 100644 index 000000000..fb801d492 --- /dev/null +++ b/scanf/doscan.c @@ -0,0 +1,651 @@ +/* __gmp_doscan -- formatted input internals. + + THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST + CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN + FUTURE GNU MP RELEASES. + +Copyright 2001 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "config.h" + +#if HAVE_STDARG +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +#include <ctype.h> +#include <stddef.h> /* for ptrdiff_t */ +#include <stdio.h> +#include <string.h> + +#if HAVE_LOCALE_H +#include <locale.h> /* for localeconv */ +#endif + +#if HAVE_STDINT_H +#include <stdint.h> /* for intmax_t */ +#endif + +#if HAVE_SYS_TYPES_H +#include <sys/types.h> /* for quad_t */ +#endif + +#include "gmp.h" +#include "gmp-impl.h" + + +/* Change this to "#define TRACE(x) x" for some traces. */ +#define TRACE(x) + + +/* It's necessary to parse up the string to recognise the GMP extra types F, + Q and Z. Other types and conversions are passed across to the standard + sscanf or fscanf via funs->scan, for ease of implemenation. This is + essential in the case of something like glibc %p where the pointer format + isn't actually documented. + + Because funs->scan doesn't get the whole input it can't put the right + values in for %n, so that's handled in __gmp_doscan. Neither sscanf nor + fscanf directly indicate how many characters were read, so an extra %n is + appended to each run for that. For fscanf this merely supports our %n + output, but for sscanf it lets funs->step move us along the input string. + + For standard % conversions, funs->scan is called once for each conversion + and any preceding fixed match text. %% is considered a fixed match for + this purpose. "next" in __gmp_doscan is where a new fixed match plus % + conversion is sought. "next_again" extends the current this_fmt. + + For GMP % conversions, funs->scan is called for any fixed preceding text + to match, plus a space to read leading whitespace. If there's no fixed + text then that space might be all that's passed. This probably isn't a + particularly speedy way to skip whitespace, but it fits easily into the + scheme and saves some complexity in gmpscan. + + vfscanf and vsscanf are only C99 additions, and so might not be available + on old systems. If they were available then multiple standard + conversions could be passed to funs->scan with a va_list for the + parameters. But plain fscanf and sscanf work fine, and parsing one field + at a time shouldn't be too much of a slowdown. + + gmpscan reads a gmp type. It's only used once, but is a separate + subroutine to avoid a big chunk of complicated code in the middle of + __gmp_doscan. With a couple of loopbacks it's possible to share code for + parsing integers, rationals and floats. + + In gmpscan normally one char of lookahead is maintained, but when width + is reached that stops, on the principle that an fgetc/ungetc of a char + past where we're told to stop would be undesirable. "chars" is how many + characters have been read so far, including the current c. When + chars==width and another character is desired then a jump is done to the + "convert" stage. c is invalid and mustn't be unget'ed in this case; + chars is set to width+1 to indicate that. + + gmpscan normally returns the number of characters read. -1 means an + invalid field, like a "-" or "+" alone. -2 means EOF reached before any + matching characters were read. + + Consideration was given to using a separate gmpscan routine for + gmp_fscanf and for gmp_sscanf. The sscanf case could zip across a string + rather than making a function call fun->get per character, and the fscanf + could use getc rather than fgetc too, which might help those systems + where getc is a macro or otherwise inlined. But none of this scanning + and converting will be particularly fast, so it seems better to have just + a common gmpscan to test and maintain. + + Enhancements: + + A way to read the GLIBC printf %a format that we support in gmp_printf + would be good. That would probably be good for plain GLIBC scanf too, so + perhaps we can simply follow its lead if it gets such a feature in the + future. */ + + + +#define GET(c) \ + do { \ + ASSERT (chars <= width); \ + chars++; \ + if (chars > width) \ + goto convert; \ + (c) = (*funs->get) (data); \ + } while (0) + +/* store into "s", extending if necessary */ +#define STORE(c) \ + do { \ + ASSERT (s_upto <= s_alloc); \ + if (s_upto >= s_alloc) \ + { \ + size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \ + s = (*__gmp_reallocate_func) (s, s_alloc, s_alloc_new); \ + s_alloc = s_alloc_new; \ + } \ + s[s_upto++] = c; \ + } while (0) + +#define S_ALLOC_STEP 512 + + +static int +gmpscan (const struct gmp_doscan_funs_t *funs, void *data, + const struct gmp_doscan_params_t *p, void *dst) +{ + int chars, c, base, first, width, seen_point, seen_digit; + size_t s_upto, s_alloc; + char *s; + int invalid = 0; + + TRACE (printf ("gmpscan\n")); + + ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z'); + + c = (*funs->get) (data); + if (c == EOF) + return -2; + + chars = 1; + first = 1; + seen_point = 0; + seen_digit = 0; + width = (p->width == 0 ? INT_MAX-1 : p->width); + base = p->base; + s_alloc = S_ALLOC_STEP; + s = (*__gmp_allocate_func) (s_alloc); + s_upto = 0; + + another: + if (c == '-') + { + STORE (c); + goto get_for_sign; + } + else if (c == '+') + { + /* don't store '+', it's not accepted by mpz_set_str etc */ + get_for_sign: + GET (c); + } + + if (base == 0) + { + base = 10; + if (c == '0') + { + seen_digit = 1; + base = 8; + STORE (c); + GET (c); + if (c == 'x' || c == 'X') + { + base = 16; + store_get_digits: + STORE (c); + GET (c); + } + } + } + + digits: + for (;;) + { + if (base == 16) + { + if (! (isascii (c) && isxdigit (c))) + break; + } + else + { + if (! (isascii (c) && isdigit (c))) + break; + if (base == 8 && (c == '8' || c == '9')) + break; + } + + seen_digit = 1; + STORE (c); + GET (c); + } + + if (first) + { + /* decimal point */ + if (p->type == 'F' && ! seen_point) + { +#if HAVE_LOCALECONV + /* For a multi-character decimal point, if the first character is + present then all of it must be, otherwise the input is + considered invalid. */ + const char *point; + int pc; + point = localeconv()->decimal_point; + pc = *point++; + if (c == pc) + { + for (;;) + { + STORE (c); + GET (c); + pc = *point++; + if (pc == '\0') + break; + if (c != pc) + goto invalid; + } + seen_point = 1; + goto digits; + } +#else + if (c == '.') + { + seen_point = 1; + goto store_get_digits; + } +#endif + } + + /* exponent */ + if (p->type == 'F' && (c == 'e' || c == 'E')) + { + /* must have at least one digit in the mantissa, just an exponent + is not good enough */ + if (! seen_digit) + goto invalid; + + exponent: + first = 0; + STORE (c); + GET (c); + goto another; + } + + /* denominator */ + if (p->type == 'Q' && c == '/') + { + /* must have at least one digit in the numerator */ + if (! seen_digit) + goto invalid; + + /* now look for at least one digit in the denominator */ + seen_digit = 0; + + /* allow the base to be redetermined for "%i" */ + base = p->base; + goto exponent; + } + } + + convert: + if (! seen_digit) + { + invalid: + invalid = 1; + goto done; + } + + if (! p->ignore) + { + STORE ('\0'); + TRACE (printf (" convert \"%s\"\n", s)); + + /* We ought to have parsed out a valid string above, so just test + mpz_set_str etc with an ASSERT. */ + switch (p->type) { + case 'F': + ASSERT (p->base == 10); + ASSERT_NOCARRY (mpf_set_str (dst, s, 10)); + break; + case 'Q': + ASSERT_NOCARRY (mpq_set_str (dst, s, p->base)); + break; + case 'Z': + ASSERT_NOCARRY (mpz_set_str (dst, s, p->base)); + break; + default: + ASSERT (0); + /*FALLTHRU*/ + break; + } + } + + done: + ASSERT (chars <= width+1); + if (chars != width+1) + { + (*funs->unget) (c, data); + TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1)); + } + chars--; + + (*__gmp_free_func) (s, s_alloc); + + if (invalid) + { + TRACE (printf (" invalid\n")); + return -1; + } + + TRACE (printf (" return %d chars (cf width %d)\n", chars, width)); + return chars; +} + + +int +__gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data, + const char *orig_fmt, va_list orig_ap) +{ + struct gmp_doscan_params_t param; + va_list ap; + char *alloc_fmt; + const char *fmt, *this_fmt, *percent, *end_fmt; + size_t orig_fmt_len, alloc_fmt_size, len; + int new_fields, new_chars, gmptype; + char fchar; + int fields = 0; + int chars = 0; + + TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt)); + + /* Don't modify orig_ap, if va_list is actually an array and hence call by + reference. It could be argued that it'd be more efficient to leave + callers to make a copy if they care, but doing so here is going to be a + very small part of the total work, and we may as well keep applications + out of trouble. */ + va_copy (ap, orig_ap); + + /* Parts of the format string are going to be copied so that a " %n" can + be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be + needed if fmt consists of a single "%" specifier, but otherwise is an + overestimate. We're not going to be very fast here, so use + __gmp_allocate_func rather than TMP_ALLOC. */ + orig_fmt_len = strlen (orig_fmt); + alloc_fmt_size = orig_fmt_len + 4; + alloc_fmt = (*__gmp_allocate_func) (alloc_fmt_size); + + fmt = orig_fmt; + end_fmt = orig_fmt + orig_fmt_len; + + for (;;) + { + next: + this_fmt = fmt; + if (fmt == end_fmt) + break; + TRACE (printf (" this_fmt \"%s\"\n", this_fmt)); + + next_again: + param.type = '\0'; + param.base = 10; + param.ignore = 0; + param.width = 0; + + percent = memchr (fmt, '%', end_fmt - fmt); + if (percent == NULL) + { + TRACE (printf (" no more fields\n")); + fmt = end_fmt; + if (fmt != this_fmt) + { + doscan_ignore: + param.ignore = 1; + goto doscan; + } + goto done; + } + TRACE (printf (" percent \"%s\"\n", percent)); + fmt = percent + 1; + + for (;;) + { + ASSERT (fmt <= end_fmt); + fchar = *fmt++; + switch (fchar) { + + case '\0': /* unterminated % sequence */ + ASSERT (0); + goto done; + + case '%': /* literal % */ + goto next_again; + + case '[': /* character range */ + fchar = *fmt++; + if (fchar == '^') + fchar = *fmt++; + /* literal ']' allowed as the first char (possibly after '^') */ + if (fchar == ']') + fchar = *fmt++; + for (;;) + { + ASSERT (fmt <= end_fmt); + if (fchar == '\0') + { + /* unterminated % sequence */ + ASSERT (0); + goto done; + } + if (fchar == ']') + break; + fchar = *fmt++; + } + /*FALLTHRU*/ + case 'c': /* characters */ + case 'd': /* decimal */ + case 'e': /* float */ + case 'E': /* float */ + case 'f': /* float */ + case 'g': /* float */ + case 'G': /* float */ + case 'p': /* pointer */ + case 's': /* string of non-whitespace */ + case 'u': /* decimal */ + doscan: + gmptype = (param.type == 'F' + || param.type == 'Q' + || param.type == 'Z'); + len = (gmptype ? percent : fmt) - this_fmt; + memcpy (alloc_fmt, this_fmt, len); + + /* let funs->scan skip leading whitespace for gmp types */ + if (gmptype) + alloc_fmt[len++] = ' '; + + /* Append a %n to give us a count of characters read. This %n + doesn't count towards the return value from funs->scan. */ + alloc_fmt[len++] = '%'; + alloc_fmt[len++] = 'n'; + alloc_fmt[len] = '\0'; + ASSERT (len < alloc_fmt_size); + + TRACE (printf ("doscan \"%s\"\n", alloc_fmt); + if (funs->scan == (gmp_doscan_scan_t) sscanf) + printf (" s=\"%s\"\n", (const char *) data)); + + new_chars = -1; + if (param.ignore || gmptype) + { + new_fields = (*funs->scan) (data, alloc_fmt, &new_chars); + ASSERT (-1 <= new_fields && new_fields <= 0); + } + else + { + new_fields = (*funs->scan) (data, alloc_fmt, + va_arg (ap, void *), &new_chars); + ASSERT (-1 <= new_fields && new_fields <= 1); + if (new_fields == 0) + goto done; + } + TRACE (printf (" new_chars %d\n", new_chars)); + TRACE (printf (" new_fields %d\n", new_fields)); + + if (new_fields == -1) + { + no_chars_matched: + if (fields == 0) + fields = -1; + goto done; + } + + /* Under param.ignore we don't know if new_fields==0 means a + match (suppressed), or no match. new_chars will only be + written for a successful match. */ + if (new_chars == -1) + goto done; + chars += new_chars; + (*funs->step) (data, new_chars); + + if (gmptype) + { + new_chars = gmpscan (funs, data, ¶m, + param.ignore ? NULL : va_arg (ap, void*)); + if (new_chars == -2) + goto no_chars_matched; + if (new_chars == -1) + goto done; + ASSERT (new_chars >= 0); + chars += new_chars; + } + + if (! param.ignore) + fields++; + goto next; + + case 'a': /* glibc allocate string */ + case '\'': /* glibc digit groupings */ + break; + + case 'F': /* mpf_t */ + case 'j': /* intmax_t */ + case 'L': /* long long */ + case 'q': /* quad_t */ + case 'Q': /* mpq_t */ + case 't': /* ptrdiff_t */ + case 'z': /* size_t */ + case 'Z': /* mpz_t */ + set_type: + param.type = fchar; + break; + + case 'h': /* short or char */ + if (param.type != 'h') + goto set_type; + param.type = 'H'; /* internal code for "hh" */ + break; + + case 'i': + param.base = 0; + goto doscan; + + case 'l': /* long, long long, double or long double */ + if (param.type != 'l') + goto set_type; + param.type = 'L'; /* "ll" means "L" */ + break; + + case 'n': + if (this_fmt != percent) + { + /* match fixed text before %n */ + param.type = '\0'; + fmt = percent; + TRACE (printf (" %%n back to \"%s\"\n", fmt)); + goto doscan_ignore; + } + + if (! param.ignore) + { + void *p; + p = va_arg (ap, void *); + TRACE (printf (" store to %p, ap %p\n", p, ap)); + switch (param.type) { + case '\0': * (int *) p = chars; break; + case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break; + case 'H': * (char *) p = chars; break; + case 'h': * (short *) p = chars; break; +#if HAVE_INTMAX_T + case 'j': * (intmax_t *) p = chars; break; +#else + case 'j': ASSERT_FAIL (intmax_t not available); break; +#endif + case 'l': * (long *) p = chars; break; +#if HAVE_QUAD_T && HAVE_LONG_LONG + case 'q': + ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long)); + /*FALLTHRU*/ +#else + case 'q': ASSERT_FAIL (quad_t not available); break; +#endif +#if HAVE_LONG_LONG + case 'L': * (long long *) p = chars; break; +#else + case 'L': ASSERT_FAIL (long long not available); break; +#endif + case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break; +#if HAVE_PTRDIFF_T + case 't': * (ptrdiff_t *) p = chars; break; +#else + case 't': ASSERT_FAIL (ptrdiff_t not available); break; +#endif +#if HAVE_SIZE_T + case 'z': * (size_t *) p = chars; break; +#else + case 'z': ASSERT_FAIL (size_t not available); break; +#endif + case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break; + default: ASSERT (0); break; + } + } + goto next; + + case 'o': + param.base = 8; + goto doscan; + + case 'x': + case 'X': + param.base = 16; + goto doscan; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + param.width = 0; + do { + param.width = param.width * 10 + (fchar-'0'); + fchar = *fmt++; + } while (isascii (fchar) && isdigit (fchar)); + fmt--; /* unget the non-digit */ + break; + + case '*': + param.ignore = 1; + break; + + default: + /* something invalid in a % sequence */ + ASSERT (0); + goto next; + } + } + } + + done: + (*__gmp_free_func) (alloc_fmt, alloc_fmt_size); + return fields; +} |