summaryrefslogtreecommitdiff
path: root/Python/formatter_unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/formatter_unicode.c')
-rw-r--r--Python/formatter_unicode.c107
1 files changed, 72 insertions, 35 deletions
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index 617d58b207..a2c2b3627c 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type)
{
if (presentation_type > 32 && presentation_type < 128)
PyErr_Format(PyExc_ValueError,
- "Cannot specify ',' with '%c'.",
+ "Cannot specify ',' or '_' with '%c'.",
(char)presentation_type);
else
PyErr_Format(PyExc_ValueError,
- "Cannot specify ',' with '\\x%x'.",
+ "Cannot specify ',' or '_' with '\\x%x'.",
(unsigned int)presentation_type);
}
+static void
+invalid_comma_and_underscore(void)
+{
+ PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
+}
+
/*
get_integer consumes 0 or more decimal digit characters from an
input string, updates *result with the corresponding positive
@@ -48,16 +54,17 @@ invalid_comma_type(Py_UCS4 presentation_type)
returns -1 on error.
*/
static int
-get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
+get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
Py_ssize_t *result)
{
- Py_ssize_t accumulator, digitval;
+ Py_ssize_t accumulator, digitval, pos = *ppos;
int numdigits;
+ int kind = PyUnicode_KIND(str);
+ void *data = PyUnicode_DATA(str);
+
accumulator = numdigits = 0;
- for (;;(*pos)++, numdigits++) {
- if (*pos >= end)
- break;
- digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos));
+ for (; pos < end; pos++, numdigits++) {
+ digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
if (digitval < 0)
break;
/*
@@ -69,10 +76,12 @@ get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,
if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
PyErr_Format(PyExc_ValueError,
"Too many decimal digits in format string");
+ *ppos = pos;
return -1;
}
accumulator = accumulator * 10 + digitval;
}
+ *ppos = pos;
*result = accumulator;
return numdigits;
}
@@ -105,6 +114,14 @@ is_sign_element(Py_UCS4 c)
}
}
+/* Locale type codes. LT_NO_LOCALE must be zero. */
+enum LocaleType {
+ LT_NO_LOCALE = 0,
+ LT_DEFAULT_LOCALE,
+ LT_UNDERSCORE_LOCALE,
+ LT_UNDER_FOUR_LOCALE,
+ LT_CURRENT_LOCALE
+};
typedef struct {
Py_UCS4 fill_char;
@@ -112,7 +129,7 @@ typedef struct {
int alternate;
Py_UCS4 sign;
Py_ssize_t width;
- int thousands_separators;
+ enum LocaleType thousands_separators;
Py_ssize_t precision;
Py_UCS4 type;
} InternalFormatSpec;
@@ -150,9 +167,11 @@ parse_internal_render_format_spec(PyObject *format_spec,
char default_align)
{
Py_ssize_t pos = start;
+ int kind = PyUnicode_KIND(format_spec);
+ void *data = PyUnicode_DATA(format_spec);
/* end-pos is used throughout this code to specify the length of
the input string */
-#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index)
+#define READ_spec(index) PyUnicode_READ(kind, data, index)
Py_ssize_t consumed;
int align_specified = 0;
@@ -163,7 +182,7 @@ parse_internal_render_format_spec(PyObject *format_spec,
format->alternate = 0;
format->sign = '\0';
format->width = -1;
- format->thousands_separators = 0;
+ format->thousands_separators = LT_NO_LOCALE;
format->precision = -1;
format->type = default_type;
@@ -218,9 +237,22 @@ parse_internal_render_format_spec(PyObject *format_spec,
/* Comma signifies add thousands separators */
if (end-pos && READ_spec(pos) == ',') {
- format->thousands_separators = 1;
+ format->thousands_separators = LT_DEFAULT_LOCALE;
+ ++pos;
+ }
+ /* Underscore signifies add thousands separators */
+ if (end-pos && READ_spec(pos) == '_') {
+ if (format->thousands_separators != LT_NO_LOCALE) {
+ invalid_comma_and_underscore();
+ return 0;
+ }
+ format->thousands_separators = LT_UNDERSCORE_LOCALE;
++pos;
}
+ if (end-pos && READ_spec(pos) == ',') {
+ invalid_comma_and_underscore();
+ return 0;
+ }
/* Parse field precision */
if (end-pos && READ_spec(pos) == '.') {
@@ -270,6 +302,16 @@ parse_internal_render_format_spec(PyObject *format_spec,
case '\0':
/* These are allowed. See PEP 378.*/
break;
+ case 'b':
+ case 'o':
+ case 'x':
+ case 'X':
+ /* Underscores are allowed in bin/oct/hex. See PEP 515. */
+ if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
+ /* Every four digits, not every three, in bin/oct/hex. */
+ format->thousands_separators = LT_UNDER_FOUR_LOCALE;
+ break;
+ }
default:
invalid_comma_type(format->type);
return 0;
@@ -346,13 +388,6 @@ fill_padding(_PyUnicodeWriter *writer,
/*********** common routines for numeric formatting *********************/
/************************************************************************/
-/* Locale type codes. */
-enum LocaleType {
- LT_CURRENT_LOCALE,
- LT_DEFAULT_LOCALE,
- LT_NO_LOCALE
-};
-
/* Locale info needed for formatting integers and the part of floats
before and including the decimal. Note that locales only support
8-bit chars, not unicode. */
@@ -404,13 +439,15 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
Py_ssize_t *n_remainder, int *has_decimal)
{
Py_ssize_t remainder;
+ int kind = PyUnicode_KIND(s);
+ void *data = PyUnicode_DATA(s);
- while (pos<end && Py_ISDIGIT(PyUnicode_READ_CHAR(s, pos)))
+ while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
++pos;
remainder = pos;
/* Does remainder start with a decimal point? */
- *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.';
+ *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
/* Skip the decimal point. */
if (*has_decimal)
@@ -658,12 +695,12 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
return 0;
}
-static char no_grouping[1] = {CHAR_MAX};
+static const char no_grouping[1] = {CHAR_MAX};
/* Find the decimal point character(s?), thousands_separator(s?), and
grouping description, either for the current locale if type is
- LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
- none if LT_NO_LOCALE. */
+ LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
+ LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
static int
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
{
@@ -684,13 +721,19 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
break;
}
case LT_DEFAULT_LOCALE:
+ case LT_UNDERSCORE_LOCALE:
+ case LT_UNDER_FOUR_LOCALE:
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
- locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
+ locale_info->thousands_sep = PyUnicode_FromOrdinal(
+ type == LT_DEFAULT_LOCALE ? ',' : '_');
if (!locale_info->decimal_point || !locale_info->thousands_sep)
return -1;
- locale_info->grouping = "\3"; /* Group every 3 characters. The
+ if (type != LT_UNDER_FOUR_LOCALE)
+ locale_info->grouping = "\3"; /* Group every 3 characters. The
(implicit) trailing 0 means repeat
infinitely. */
+ else
+ locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
break;
case LT_NO_LOCALE:
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
@@ -937,9 +980,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
/* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
- (format->thousands_separators ?
- LT_DEFAULT_LOCALE :
- LT_NO_LOCALE),
+ format->thousands_separators,
&locale) == -1)
goto done;
@@ -1084,9 +1125,7 @@ format_float_internal(PyObject *value,
/* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
- (format->thousands_separators ?
- LT_DEFAULT_LOCALE :
- LT_NO_LOCALE),
+ format->thousands_separators,
&locale) == -1)
goto done;
@@ -1262,9 +1301,7 @@ format_complex_internal(PyObject *value,
/* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
- (format->thousands_separators ?
- LT_DEFAULT_LOCALE :
- LT_NO_LOCALE),
+ format->thousands_separators,
&locale) == -1)
goto done;