summaryrefslogtreecommitdiff
path: root/gettext-tools/src/format-python-brace.c
diff options
context:
space:
mode:
Diffstat (limited to 'gettext-tools/src/format-python-brace.c')
-rw-r--r--gettext-tools/src/format-python-brace.c542
1 files changed, 542 insertions, 0 deletions
diff --git a/gettext-tools/src/format-python-brace.c b/gettext-tools/src/format-python-brace.c
new file mode 100644
index 0000000..141d3df
--- /dev/null
+++ b/gettext-tools/src/format-python-brace.c
@@ -0,0 +1,542 @@
+/* Python brace format strings.
+ Copyright (C) 2004, 2006-2007, 2013 Free Software Foundation, Inc.
+ Written by Daiki Ueno <ueno@gnu.org>, 2013.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "format.h"
+#include "c-ctype.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "format-invalid.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+/* Python brace format strings are defined by PEP3101 together with
+ 'format' method of string class.
+ A format string directive here consists of
+ - an opening brace '{',
+ - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
+ - an optional getattr ('.') or getitem ('['..']') operator with
+ an identifier as argument,
+ - an optional format specifier starting with ':', with a
+ (unnested) format string as argument,
+ - a closing brace '}'.
+ Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'.
+*/
+
+struct named_arg
+{
+ char *name;
+};
+
+struct spec
+{
+ unsigned int directives;
+ unsigned int named_arg_count;
+ unsigned int allocated;
+ struct named_arg *named;
+};
+
+
+static bool parse_upto (struct spec *spec, const char **formatp,
+ bool is_toplevel, char terminator,
+ bool translated, char *fdi, char **invalid_reason);
+static void free_named_args (struct spec *spec);
+
+
+/* All the parse_* functions (except parse_upto) follow the same
+ calling convention. FORMATP shall point to the beginning of a token.
+ If parsing succeeds, FORMATP will point to the next character after
+ the token, and true is returned. Otherwise, FORMATP will be
+ unchanged and false is returned. */
+
+static bool
+parse_named_field (struct spec *spec,
+ const char **formatp, bool translated, char *fdi,
+ char **invalid_reason)
+{
+ const char *format = *formatp;
+ char c;
+
+ c = *format;
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
+ {
+ do
+ c = *++format;
+ while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
+ || (c >= '0' && c <= '9'));
+ *formatp = format;
+ return true;
+ }
+ return false;
+}
+
+static bool
+parse_numeric_field (struct spec *spec,
+ const char **formatp, bool translated, char *fdi,
+ char **invalid_reason)
+{
+ const char *format = *formatp;
+ char c;
+
+ c = *format;
+ if (c >= '0' && c <= '9')
+ {
+ do
+ c = *++format;
+ while (c >= '0' && c <= '9');
+ *formatp = format;
+ return true;
+ }
+ return false;
+}
+
+static bool
+parse_directive (struct spec *spec,
+ const char **formatp, bool is_toplevel,
+ bool translated, char *fdi, char **invalid_reason)
+{
+ const char *format = *formatp;
+ const char *const format_start = format;
+ const char *name_start;
+ char c;
+
+ c = *++format;
+ if (c == '{')
+ {
+ *formatp = ++format;
+ return true;
+ }
+
+ name_start = format;
+ if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
+ && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ c = *format;
+ if (c == '.')
+ {
+ format++;
+ if (!parse_named_field (spec, &format, translated, fdi,
+ invalid_reason))
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ c = *format;
+ }
+ else if (c == '[')
+ {
+ format++;
+ if (!parse_named_field (spec, &format, translated, fdi,
+ invalid_reason)
+ && !parse_numeric_field (spec, &format, translated, fdi,
+ invalid_reason))
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ c = *format++;
+ if (c != ']')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ c = *format;
+ }
+
+ if (c == ':')
+ {
+ if (!is_toplevel)
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ /* Format specifiers. Although a format specifier can be any
+ string in theory, we can only recognize two types of format
+ specifiers below, because otherwise we would need to evaluate
+ Python expressions by ourselves:
+
+ - A nested format directive expanding to the whole string
+ - The Standard Format Specifiers, as described in PEP3101,
+ not including a nested format directive */
+ format++;
+ if (*format == '{')
+ {
+ /* Nested format directive. */
+ if (!parse_directive (spec, &format, false, translated, fdi,
+ invalid_reason))
+ {
+ /* FDI and INVALID_REASON will be set by a recursive call of
+ parse_directive. */
+ return false;
+ }
+
+ if (*format != '}')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ }
+ else
+ {
+ /* Standard format specifiers is in the form:
+ [[fill]align][sign][#][0][minimumwidth][.precision][type] */
+
+ /* Look ahead two characters to skip [[fill]align]. */
+ int c1, c2;
+
+ c1 = format[0];
+ c2 = format[1];
+
+ if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
+ format += 2;
+ else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
+ format++;
+ if (*format == '+' || *format == '-' || *format == ' ')
+ format++;
+ if (*format == '#')
+ format++;
+ if (*format == '0')
+ format++;
+ while (c_isdigit (*format))
+ format++;
+ if (*format == '.')
+ {
+ format++;
+ while (c_isdigit (*format))
+ format++;
+ }
+ switch (*format)
+ {
+ case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
+ case 'n':
+ case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
+ case '%':
+ format++;
+ break;
+ default:
+ break;
+ }
+ if (*format != '}')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ }
+ c = *format;
+ }
+
+ if (c != '}')
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ if (is_toplevel)
+ {
+ char *name;
+ size_t n = format - name_start;
+
+ FDI_SET (name_start - 1, FMTDIR_START);
+
+ name = XNMALLOC (n + 1, char);
+ memcpy (name, name_start, n);
+ name[n] = '\0';
+
+ spec->directives++;
+
+ if (spec->allocated == spec->named_arg_count)
+ {
+ spec->allocated = 2 * spec->allocated + 1;
+ spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
+ }
+ spec->named[spec->named_arg_count].name = name;
+ spec->named_arg_count++;
+
+ FDI_SET (format, FMTDIR_END);
+ }
+
+ *formatp = ++format;
+ return true;
+}
+
+static bool
+parse_upto (struct spec *spec,
+ const char **formatp, bool is_toplevel, char terminator,
+ bool translated, char *fdi, char **invalid_reason)
+{
+ const char *format = *formatp;
+
+ for (; *format != terminator && *format != '\0';)
+ {
+ if (*format == '{')
+ {
+ if (!parse_directive (spec, &format, is_toplevel, translated, fdi,
+ invalid_reason))
+ return false;
+ }
+ else
+ format++;
+ }
+
+ *formatp = format;
+ return true;
+}
+
+static int
+named_arg_compare (const void *p1, const void *p2)
+{
+ return strcmp (((const struct named_arg *) p1)->name,
+ ((const struct named_arg *) p2)->name);
+}
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+ char **invalid_reason)
+{
+ struct spec spec;
+ struct spec *result;
+
+ spec.directives = 0;
+ spec.named_arg_count = 0;
+ spec.allocated = 0;
+ spec.named = NULL;
+
+ if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason))
+ {
+ free_named_args (&spec);
+ return NULL;
+ }
+
+ /* Sort the named argument array, and eliminate duplicates. */
+ if (spec.named_arg_count > 1)
+ {
+ unsigned int i, j;
+
+ qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
+ named_arg_compare);
+
+ /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
+ for (i = j = 0; i < spec.named_arg_count; i++)
+ if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
+ free (spec.named[i].name);
+ else
+ {
+ if (j < i)
+ spec.named[j].name = spec.named[i].name;
+ j++;
+ }
+ spec.named_arg_count = j;
+ }
+
+ result = XMALLOC (struct spec);
+ *result = spec;
+ return result;
+}
+
+static void
+free_named_args (struct spec *spec)
+{
+ if (spec->named != NULL)
+ {
+ unsigned int i;
+ for (i = 0; i < spec->named_arg_count; i++)
+ free (spec->named[i].name);
+ free (spec->named);
+ }
+}
+
+static void
+format_free (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ free_named_args (spec);
+ free (spec);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+ formatstring_error_logger_t error_logger,
+ const char *pretty_msgid, const char *pretty_msgstr)
+{
+ struct spec *spec1 = (struct spec *) msgid_descr;
+ struct spec *spec2 = (struct spec *) msgstr_descr;
+ bool err = false;
+
+ if (spec1->named_arg_count + spec2->named_arg_count > 0)
+ {
+ unsigned int i, j;
+ unsigned int n1 = spec1->named_arg_count;
+ unsigned int n2 = spec2->named_arg_count;
+
+ /* Check the argument names in spec1 are contained in those of spec2.
+ Both arrays are sorted. We search for the differences. */
+ for (i = 0, j = 0; i < n1 || j < n2; )
+ {
+ int cmp = (i >= n1 ? 1 :
+ j >= n2 ? -1 :
+ strcmp (spec1->named[i].name, spec2->named[j].name));
+
+ if (cmp > 0)
+ {
+ if (equality)
+ {
+ if (error_logger)
+ error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
+ spec2->named[i].name, pretty_msgid);
+ err = true;
+ break;
+ }
+ else
+ j++;
+ }
+ else if (cmp < 0)
+ {
+ if (equality)
+ {
+ if (error_logger)
+ error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
+ spec1->named[i].name, pretty_msgstr);
+ err = true;
+ break;
+ }
+ else
+ i++;
+ }
+ else
+ j++, i++;
+ }
+ }
+
+ return err;
+}
+
+
+struct formatstring_parser formatstring_python_brace =
+{
+ format_parse,
+ format_free,
+ format_get_number_of_directives,
+ NULL,
+ format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+ format_parse for strings read from standard input. */
+
+#include <stdio.h>
+
+static void
+format_print (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+ unsigned int i;
+
+ if (spec == NULL)
+ {
+ printf ("INVALID");
+ return;
+ }
+
+ printf ("{");
+ for (i = 0; i < spec->named_arg_count; i++)
+ {
+ if (i > 0)
+ printf (", ");
+ printf ("'%s'", spec->named[i].name);
+ }
+ printf ("}");
+}
+
+int
+main ()
+{
+ for (;;)
+ {
+ char *line = NULL;
+ size_t line_size = 0;
+ int line_len;
+ char *invalid_reason;
+ void *descr;
+
+ line_len = getline (&line, &line_size, stdin);
+ if (line_len < 0)
+ break;
+ if (line_len > 0 && line[line_len - 1] == '\n')
+ line[--line_len] = '\0';
+
+ invalid_reason = NULL;
+ descr = format_parse (line, false, NULL, &invalid_reason);
+
+ format_print (descr);
+ printf ("\n");
+ if (descr == NULL)
+ printf ("%s\n", invalid_reason);
+
+ free (invalid_reason);
+ free (line);
+ }
+
+ return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */