1 files changed, 1301 insertions, 0 deletions
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
new file mode 100644
index 0000000000..d4719a578b
--- /dev/null
+++ b/Objects/stringlib/unicode_format.h
@@ -0,0 +1,1301 @@
+/*
+    unicode_format.h -- implementation of str.format().
+*/
+
+/************************************************************************/
+/***********   Global data structures and forward declarations  *********/
+/************************************************************************/
+
+/*
+   A SubString consists of the characters between two string or
+   unicode pointers.
+*/
+typedef struct {
+    PyObject *str; /* borrowed reference */
+    Py_ssize_t start, end;
+} SubString;
+
+
+typedef enum {
+    ANS_INIT,
+    ANS_AUTO,
+    ANS_MANUAL
+} AutoNumberState;   /* Keep track if we're auto-numbering fields */
+
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+    AutoNumberState an_state;
+    int an_field_number;
+} AutoNumber;
+
+
+/* forward declaration for recursion */
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+             int recursion_depth, AutoNumber *auto_number);
+
+
+
+/************************************************************************/
+/**************************  Utility  functions  ************************/
+/************************************************************************/
+
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+    auto_number->an_state = ANS_INIT;
+    auto_number->an_field_number = 0;
+}
+
+/* fill in a SubString from a pointer and length */
+Py_LOCAL_INLINE(void)
+SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
+{
+    str->str = s;
+    str->start = start;
+    str->end = end;
+}
+
+/* return a new string.  if str->str is NULL, return None */
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object(SubString *str)
+{
+    if (str->str == NULL) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    return PyUnicode_Substring(str->str, str->start, str->end);
+}
+
+/* return a new string.  if str->str is NULL, return None */
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object_or_empty(SubString *str)
+{
+    if (str->str == NULL) {
+        return PyUnicode_New(0, 0);
+    }
+    return SubString_new_object(str);
+}
+
+/* Return 1 if an error has been detected switching between automatic
+   field numbering and manual field specification, else return 0. Set
+   ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+    if (state == ANS_MANUAL) {
+        if (field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "manual field specification to "
+                            "automatic field numbering");
+            return 1;
+        }
+    }
+    else {
+        if (!field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "automatic field numbering to "
+                            "manual field specification");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
+/************************************************************************/
+/***********  Format string parsing -- integers and identifiers *********/
+/************************************************************************/
+
+static Py_ssize_t
+get_integer(const SubString *str)
+{
+    Py_ssize_t accumulator = 0;
+    Py_ssize_t digitval;
+    Py_ssize_t i;
+
+    /* empty string is an error */
+    if (str->start >= str->end)
+        return -1;
+
+    for (i = str->start; i < str->end; i++) {
+        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
+        if (digitval < 0)
+            return -1;
+        /*
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
+        */
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
+            PyErr_Format(PyExc_ValueError,
+                         "Too many decimal digits in format string");
+            return -1;
+        }
+        accumulator = accumulator * 10 + digitval;
+    }
+    return accumulator;
+}
+
+/************************************************************************/
+/******** Functions to get field objects and specification strings ******/
+/************************************************************************/
+
+/* do the equivalent of obj.name */
+static PyObject *
+getattr(PyObject *obj, SubString *name)
+{
+    PyObject *newobj;
+    PyObject *str = SubString_new_object(name);
+    if (str == NULL)
+        return NULL;
+    newobj = PyObject_GetAttr(obj, str);
+    Py_DECREF(str);
+    return newobj;
+}
+
+/* do the equivalent of obj[idx], where obj is a sequence */
+static PyObject *
+getitem_sequence(PyObject *obj, Py_ssize_t idx)
+{
+    return PySequence_GetItem(obj, idx);
+}
+
+/* do the equivalent of obj[idx], where obj is not a sequence */
+static PyObject *
+getitem_idx(PyObject *obj, Py_ssize_t idx)
+{
+    PyObject *newobj;
+    PyObject *idx_obj = PyLong_FromSsize_t(idx);
+    if (idx_obj == NULL)
+        return NULL;
+    newobj = PyObject_GetItem(obj, idx_obj);
+    Py_DECREF(idx_obj);
+    return newobj;
+}
+
+/* do the equivalent of obj[name] */
+static PyObject *
+getitem_str(PyObject *obj, SubString *name)
+{
+    PyObject *newobj;
+    PyObject *str = SubString_new_object(name);
+    if (str == NULL)
+        return NULL;
+    newobj = PyObject_GetItem(obj, str);
+    Py_DECREF(str);
+    return newobj;
+}
+
+typedef struct {
+    /* the entire string we're parsing.  we assume that someone else
+       is managing its lifetime, and that it will exist for the
+       lifetime of the iterator.  can be empty */
+    SubString str;
+
+    /* index to where we are inside field_name */
+    Py_ssize_t index;
+} FieldNameIterator;
+
+
+static int
+FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
+                       Py_ssize_t start, Py_ssize_t end)
+{
+    SubString_init(&self->str, s, start, end);
+    self->index = start;
+    return 1;
+}
+
+static int
+_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
+{
+    Py_UCS4 c;
+
+    name->str = self->str.str;
+    name->start = self->index;
+
+    /* return everything until '.' or '[' */
+    while (self->index < self->str.end) {
+        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
+        switch (c) {
+        case '[':
+        case '.':
+            /* backup so that we this character will be seen next time */
+            self->index--;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+    /* end of string is okay */
+    name->end = self->index;
+    return 1;
+}
+
+static int
+_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
+{
+    int bracket_seen = 0;
+    Py_UCS4 c;
+
+    name->str = self->str.str;
+    name->start = self->index;
+
+    /* return everything until ']' */
+    while (self->index < self->str.end) {
+        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
+        switch (c) {
+        case ']':
+            bracket_seen = 1;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+    /* make sure we ended with a ']' */
+    if (!bracket_seen) {
+        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
+        return 0;
+    }
+
+    /* end of string is okay */
+    /* don't include the ']' */
+    name->end = self->index-1;
+    return 1;
+}
+
+/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
+static int
+FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
+                       Py_ssize_t *name_idx, SubString *name)
+{
+    /* check at end of input */
+    if (self->index >= self->str.end)
+        return 1;
+
+    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
+    case '.':
+        *is_attribute = 1;
+        if (_FieldNameIterator_attr(self, name) == 0)
+            return 0;
+        *name_idx = -1;
+        break;
+    case '[':
+        *is_attribute = 0;
+        if (_FieldNameIterator_item(self, name) == 0)
+            return 0;
+        *name_idx = get_integer(name);
+        if (*name_idx == -1 && PyErr_Occurred())
+            return 0;
+        break;
+    default:
+        /* Invalid character follows ']' */
+        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
+                        "follow ']' in format field specifier");
+        return 0;
+    }
+
+    /* empty string is an error */
+    if (name->start == name->end) {
+        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
+        return 0;
+    }
+
+    return 2;
+}
+
+
+/* input: field_name
+   output: 'first' points to the part before the first '[' or '.'
+           'first_idx' is -1 if 'first' is not an integer, otherwise
+                       it's the value of first converted to an integer
+           'rest' is an iterator to return the rest
+*/
+static int
+field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
+                 Py_ssize_t *first_idx, FieldNameIterator *rest,
+                 AutoNumber *auto_number)
+{
+    Py_UCS4 c;
+    Py_ssize_t i = start;
+    int field_name_is_empty;
+    int using_numeric_index;
+
+    /* find the part up until the first '.' or '[' */
+    while (i < end) {
+        switch (c = PyUnicode_READ_CHAR(str, i++)) {
+        case '[':
+        case '.':
+            /* backup so that we this character is available to the
+               "rest" iterator */
+            i--;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    /* set up the return values */
+    SubString_init(first, str, start, i);
+    FieldNameIterator_init(rest, str, i, end);
+
+    /* see if "first" is an integer, in which case it's used as an index */
+    *first_idx = get_integer(first);
+    if (*first_idx == -1 && PyErr_Occurred())
+        return 0;
+
+    field_name_is_empty = first->start >= first->end;
+
+    /* If the field name is omitted or if we have a numeric index
+       specified, then we're doing numeric indexing into args. */
+    using_numeric_index = field_name_is_empty || *first_idx != -1;
+
+    /* We always get here exactly one time for each field we're
+       processing. And we get here in field order (counting by left
+       braces). So this is the perfect place to handle automatic field
+       numbering if the field name is omitted. */
+
+    /* Check if we need to do the auto-numbering. It's not needed if
+       we're called from string.Format routines, because it's handled
+       in that class by itself. */
+    if (auto_number) {
+        /* Initialize our auto numbering state if this is the first
+           time we're either auto-numbering or manually numbering. */
+        if (auto_number->an_state == ANS_INIT && using_numeric_index)
+            auto_number->an_state = field_name_is_empty ?
+                ANS_AUTO : ANS_MANUAL;
+
+        /* Make sure our state is consistent with what we're doing
+           this time through. Only check if we're using a numeric
+           index. */
+        if (using_numeric_index)
+            if (autonumber_state_error(auto_number->an_state,
+                                       field_name_is_empty))
+                return 0;
+        /* Zero length field means we want to do auto-numbering of the
+           fields. */
+        if (field_name_is_empty)
+            *first_idx = (auto_number->an_field_number)++;
+    }
+
+    return 1;
+}
+
+
+/*
+    get_field_object returns the object inside {}, before the
+    format_spec.  It handles getindex and getattr lookups and consumes
+    the entire input string.
+*/
+static PyObject *
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+                 AutoNumber *auto_number)
+{
+    PyObject *obj = NULL;
+    int ok;
+    int is_attribute;
+    SubString name;
+    SubString first;
+    Py_ssize_t index;
+    FieldNameIterator rest;
+
+    if (!field_name_split(input->str, input->start, input->end, &first,
+                          &index, &rest, auto_number)) {
+        goto error;
+    }
+
+    if (index == -1) {
+        /* look up in kwargs */
+        PyObject *key = SubString_new_object(&first);
+        if (key == NULL)
+            goto error;
+
+        /* Use PyObject_GetItem instead of PyDict_GetItem because this
+           code is no longer just used with kwargs. It might be passed
+           a non-dict when called through format_map. */
+        if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
+            PyErr_SetObject(PyExc_KeyError, key);
+            Py_DECREF(key);
+            goto error;
+        }
+        Py_DECREF(key);
+    }
+    else {
+        /* If args is NULL, we have a format string with a positional field
+           with only kwargs to retrieve it from. This can only happen when
+           used with format_map(), where positional arguments are not
+           allowed. */
+        if (args == NULL) {
+            PyErr_SetString(PyExc_ValueError, "Format string contains "
+                            "positional fields");
+            goto error;
+        }
+
+        /* look up in args */
+        obj = PySequence_GetItem(args, index);
+        if (obj == NULL)
+            goto error;
+    }
+
+    /* iterate over the rest of the field_name */
+    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
+                                        &name)) == 2) {
+        PyObject *tmp;
+
+        if (is_attribute)
+            /* getattr lookup "." */
+            tmp = getattr(obj, &name);
+        else
+            /* getitem lookup "[]" */
+            if (index == -1)
+                tmp = getitem_str(obj, &name);
+            else
+                if (PySequence_Check(obj))
+                    tmp = getitem_sequence(obj, index);
+                else
+                    /* not a sequence */
+                    tmp = getitem_idx(obj, index);
+        if (tmp == NULL)
+            goto error;
+
+        /* assign to obj */
+        Py_DECREF(obj);
+        obj = tmp;
+    }
+    /* end of iterator, this is the non-error case */
+    if (ok == 1)
+        return obj;
+error:
+    Py_XDECREF(obj);
+    return NULL;
+}
+
+/************************************************************************/
+/*****************  Field rendering functions  **************************/
+/************************************************************************/
+
+/*
+    render_field() is the main function in this section.  It takes the
+    field object and field specification string generated by
+    get_field_and_spec, and renders the field into the output string.
+
+    render_field calls fieldobj.__format__(format_spec) method, and
+    appends to the output.
+*/
+static int
+render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
+{
+    int ok = 0;
+    PyObject *result = NULL;
+    PyObject *format_spec_object = NULL;
+    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+    int err;
+
+    /* If we know the type exactly, skip the lookup of __format__ and just
+       call the formatter directly. */
+    if (PyUnicode_CheckExact(fieldobj))
+        formatter = _PyUnicode_FormatAdvancedWriter;
+    else if (PyLong_CheckExact(fieldobj))
+        formatter = _PyLong_FormatAdvancedWriter;
+    else if (PyFloat_CheckExact(fieldobj))
+        formatter = _PyFloat_FormatAdvancedWriter;
+    else if (PyComplex_CheckExact(fieldobj))
+        formatter = _PyComplex_FormatAdvancedWriter;
+
+    if (formatter) {
+        /* we know exactly which formatter will be called when __format__ is
+           looked up, so call it directly, instead. */
+        err = formatter(writer, fieldobj, format_spec->str,
+                        format_spec->start, format_spec->end);
+        return (err == 0);
+    }
+    else {
+        /* We need to create an object out of the pointers we have, because
+           __format__ takes a string/unicode object for format_spec. */
+        if (format_spec->str)
+            format_spec_object = PyUnicode_Substring(format_spec->str,
+                                                     format_spec->start,
+                                                     format_spec->end);
+        else
+            format_spec_object = PyUnicode_New(0, 0);
+        if (format_spec_object == NULL)
+            goto done;
+
+        result = PyObject_Format(fieldobj, format_spec_object);
+    }
+    if (result == NULL)
+        goto done;
+
+    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
+        goto done;
+    ok = 1;
+
+done:
+    Py_XDECREF(format_spec_object);
+    Py_XDECREF(result);
+    return ok;
+}
+
+static int
+parse_field(SubString *str, SubString *field_name, SubString *format_spec,
+            Py_UCS4 *conversion)
+{
+    /* Note this function works if the field name is zero length,
+       which is good.  Zero length field names are handled later, in
+       field_name_split. */
+
+    Py_UCS4 c = 0;
+
+    /* initialize these, as they may be empty */
+    *conversion = '\0';
+    SubString_init(format_spec, NULL, 0, 0);
+
+    /* Search for the field name.  it's terminated by the end of
+       the string, or a ':' or '!' */
+    field_name->str = str->str;
+    field_name->start = str->start;
+    while (str->start < str->end) {
+        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+        case ':':
+        case '!':
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    if (c == '!' || c == ':') {
+        /* we have a format specifier and/or a conversion */
+        /* don't include the last character */
+        field_name->end = str->start-1;
+
+        /* the format specifier is the rest of the string */
+        format_spec->str = str->str;
+        format_spec->start = str->start;
+        format_spec->end = str->end;
+
+        /* see if there's a conversion specifier */
+        if (c == '!') {
+            /* there must be another character present */
+            if (format_spec->start >= format_spec->end) {
+                PyErr_SetString(PyExc_ValueError,
+                                "end of format while looking for conversion "
+                                "specifier");
+                return 0;
+            }
+            *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+
+            /* if there is another character, it must be a colon */
+            if (format_spec->start < format_spec->end) {
+                c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+                if (c != ':') {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "expected ':' after format specifier");
+                    return 0;
+                }
+            }
+        }
+    }
+    else
+        /* end of string, there's no format_spec or conversion */
+        field_name->end = str->start;
+
+    return 1;
+}
+
+/************************************************************************/
+/******* Output string allocation and escape-to-markup processing  ******/
+/************************************************************************/
+
+/* MarkupIterator breaks the string into pieces of either literal
+   text, or things inside {} that need to be marked up.  it is
+   designed to make it easy to wrap a Python iterator around it, for
+   use with the Formatter class */
+
+typedef struct {
+    SubString str;
+} MarkupIterator;
+
+static int
+MarkupIterator_init(MarkupIterator *self, PyObject *str,
+                    Py_ssize_t start, Py_ssize_t end)
+{
+    SubString_init(&self->str, str, start, end);
+    return 1;
+}
+
+/* returns 0 on error, 1 on non-error termination, and 2 if it got a
+   string (or something to be expanded) */
+static int
+MarkupIterator_next(MarkupIterator *self, SubString *literal,
+                    int *field_present, SubString *field_name,
+                    SubString *format_spec, Py_UCS4 *conversion,
+                    int *format_spec_needs_expanding)
+{
+    int at_end, hit_format_spec;
+    Py_UCS4 c = 0;
+    Py_ssize_t start;
+    int count;
+    Py_ssize_t len;
+    int markup_follows = 0;
+
+    /* initialize all of the output variables */
+    SubString_init(literal, NULL, 0, 0);
+    SubString_init(field_name, NULL, 0, 0);
+    SubString_init(format_spec, NULL, 0, 0);
+    *conversion = '\0';
+    *format_spec_needs_expanding = 0;
+    *field_present = 0;
+
+    /* No more input, end of iterator.  This is the normal exit
+       path. */
+    if (self->str.start >= self->str.end)
+        return 1;
+
+    start = self->str.start;
+
+    /* First read any literal text. Read until the end of string, an
+       escaped '{' or '}', or an unescaped '{'.  In order to never
+       allocate memory and so I can just pass pointers around, if
+       there's an escaped '{' or '}' then we'll return the literal
+       including the brace, but no format object.  The next time
+       through, we'll return the rest of the literal, skipping past
+       the second consecutive brace. */
+    while (self->str.start < self->str.end) {
+        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
+        case '{':
+        case '}':
+            markup_follows = 1;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    at_end = self->str.start >= self->str.end;
+    len = self->str.start - start;
+
+    if ((c == '}') && (at_end ||
+                       (c != PyUnicode_READ_CHAR(self->str.str,
+                                                 self->str.start)))) {
+        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
+                        "in format string");
+        return 0;
+    }
+    if (at_end && c == '{') {
+        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
+                        "in format string");
+        return 0;
+    }
+    if (!at_end) {
+        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
+            /* escaped } or {, skip it in the input.  there is no
+               markup object following us, just this literal text */
+            self->str.start++;
+            markup_follows = 0;
+        }
+        else
+            len--;
+    }
+
+    /* record the literal text */
+    literal->str = self->str.str;
+    literal->start = start;
+    literal->end = start + len;
+
+    if (!markup_follows)
+        return 2;
+
+    /* this is markup, find the end of the string by counting nested
+       braces.  note that this prohibits escaped braces, so that
+       format_specs cannot have braces in them. */
+    *field_present = 1;
+    count = 1;
+
+    start = self->str.start;
+
+    /* we know we can't have a zero length string, so don't worry
+       about that case */
+    hit_format_spec = 0;
+    while (self->str.start < self->str.end) {
+        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
+        case ':':
+            if (!hit_format_spec) {
+                count = 1;
+                hit_format_spec = 1;
+            }
+            break;
+        case '{':
+            /* the format spec needs to be recursively expanded.
+               this is an optimization, and not strictly needed */
+            if (hit_format_spec)
+                *format_spec_needs_expanding = 1;
+            count++;
+            break;
+        case '}':
+            count--;
+            if (count <= 0) {
+                /* we're done.  parse and get out */
+                SubString s;
+
+                SubString_init(&s, self->str.str, start, self->str.start - 1);
+                if (parse_field(&s, field_name, format_spec, conversion) == 0)
+                    return 0;
+
+                /* success */
+                return 2;
+            }
+            break;
+        }
+    }
+
+    /* end of string while searching for matching '}' */
+    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
+    return 0;
+}
+
+
+/* do the !r or !s conversion on obj */
+static PyObject *
+do_conversion(PyObject *obj, Py_UCS4 conversion)
+{
+    /* XXX in pre-3.0, do we need to convert this to unicode, since it
+       might have returned a string? */
+    switch (conversion) {
+    case 'r':
+        return PyObject_Repr(obj);
+    case 's':
+        return PyObject_Str(obj);
+    case 'a':
+        return PyObject_ASCII(obj);
+    default:
+        if (conversion > 32 && conversion < 127) {
+                /* It's the ASCII subrange; casting to char is safe
+                   (assuming the execution character set is an ASCII
+                   superset). */
+                PyErr_Format(PyExc_ValueError,
+                     "Unknown conversion specifier %c",
+                     (char)conversion);
+        } else
+                PyErr_Format(PyExc_ValueError,
+                     "Unknown conversion specifier \\x%x",
+                     (unsigned int)conversion);
+        return NULL;
+    }
+}
+
+/* given:
+
+   {field_name!conversion:format_spec}
+
+   compute the result and write it to output.
+   format_spec_needs_expanding is an optimization.  if it's false,
+   just output the string directly, otherwise recursively expand the
+   format_spec string.
+
+   field_name is allowed to be zero length, in which case we
+   are doing auto field numbering.
+*/
+
+static int
+output_markup(SubString *field_name, SubString *format_spec,
+              int format_spec_needs_expanding, Py_UCS4 conversion,
+              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
+              int recursion_depth, AutoNumber *auto_number)
+{
+    PyObject *tmp = NULL;
+    PyObject *fieldobj = NULL;
+    SubString expanded_format_spec;
+    SubString *actual_format_spec;
+    int result = 0;
+
+    /* convert field_name to an object */
+    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
+    if (fieldobj == NULL)
+        goto done;
+
+    if (conversion != '\0') {
+        tmp = do_conversion(fieldobj, conversion);
+        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+            goto done;
+
+        /* do the assignment, transferring ownership: fieldobj = tmp */
+        Py_DECREF(fieldobj);
+        fieldobj = tmp;
+        tmp = NULL;
+    }
+
+    /* if needed, recurively compute the format_spec */
+    if (format_spec_needs_expanding) {
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+                           auto_number);
+        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+            goto done;
+
+        /* note that in the case we're expanding the format string,
+           tmp must be kept around until after the call to
+           render_field. */
+        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
+        actual_format_spec = &expanded_format_spec;
+    }
+    else
+        actual_format_spec = format_spec;
+
+    if (render_field(fieldobj, actual_format_spec, writer) == 0)
+        goto done;
+
+    result = 1;
+
+done:
+    Py_XDECREF(fieldobj);
+    Py_XDECREF(tmp);
+
+    return result;
+}
+
+/*
+    do_markup is the top-level loop for the format() method.  It
+    searches through the format string for escapes to markup codes, and
+    calls other functions to move non-markup text to the output,
+    and to perform the markup to the output.
+*/
+static int
+do_markup(SubString *input, PyObject *args, PyObject *kwargs,
+          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
+{
+    MarkupIterator iter;
+    int format_spec_needs_expanding;
+    int result;
+    int field_present;
+    SubString literal;
+    SubString field_name;
+    SubString format_spec;
+    Py_UCS4 conversion, maxchar;
+    Py_ssize_t sublen;
+    int err;
+
+    MarkupIterator_init(&iter, input->str, input->start, input->end);
+    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+                                         &field_name, &format_spec,
+                                         &conversion,
+                                         &format_spec_needs_expanding)) == 2) {
+        sublen = literal.end - literal.start;
+        if (sublen) {
+            maxchar = _PyUnicode_FindMaxChar(literal.str,
+                                             literal.start, literal.end);
+            err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
+            if (err == -1)
+                return 0;
+            _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                          literal.str, literal.start, sublen);
+            writer->pos += sublen;
+        }
+
+        if (field_present) {
+            if (iter.str.start == iter.str.end)
+                writer->overallocate = 0;
+            if (!output_markup(&field_name, &format_spec,
+                               format_spec_needs_expanding, conversion, writer,
+                               args, kwargs, recursion_depth, auto_number))
+                return 0;
+        }
+    }
+    return result;
+}
+
+
+/*
+    build_string allocates the output string and then
+    calls do_markup to do the heavy lifting.
+*/
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+             int recursion_depth, AutoNumber *auto_number)
+{
+    _PyUnicodeWriter writer;
+    Py_ssize_t minlen;
+
+    /* check the recursion level */
+    if (recursion_depth <= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Max string recursion exceeded");
+        return NULL;
+    }
+
+    minlen = PyUnicode_GET_LENGTH(input->str) + 100;
+    _PyUnicodeWriter_Init(&writer, minlen);
+
+    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
+                   auto_number)) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+
+    return _PyUnicodeWriter_Finish(&writer);
+}
+
+/************************************************************************/
+/*********** main routine ***********************************************/
+/************************************************************************/
+
+/* this is the main entry point */
+static PyObject *
+do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    SubString input;
+
+    /* PEP 3101 says only 2 levels, so that
+       "{0:{1}}".format('abc', 's')            # works
+       "{0:{1:{2}}}".format('abc', 's', '')    # fails
+    */
+    int recursion_depth = 2;
+
+    AutoNumber auto_number;
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    AutoNumber_Init(&auto_number);
+    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
+    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
+}
+
+static PyObject *
+do_string_format_map(PyObject *self, PyObject *obj)
+{
+    return do_string_format(self, NULL, obj);
+}
+
+
+/************************************************************************/
+/*********** formatteriterator ******************************************/
+/************************************************************************/
+
+/* This is used to implement string.Formatter.vparse().  It exists so
+   Formatter can share code with the built in unicode.format() method.
+   It's really just a wrapper around MarkupIterator that is callable
+   from Python. */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *str;
+    MarkupIterator it_markup;
+} formatteriterobject;
+
+static void
+formatteriter_dealloc(formatteriterobject *it)
+{
+    Py_XDECREF(it->str);
+    PyObject_FREE(it);
+}
+
+/* returns a tuple:
+   (literal, field_name, format_spec, conversion)
+
+   literal is any literal text to output.  might be zero length
+   field_name is the string before the ':'.  might be None
+   format_spec is the string after the ':'.  mibht be None
+   conversion is either None, or the string after the '!'
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
+{
+    SubString literal;
+    SubString field_name;
+    SubString format_spec;
+    Py_UCS4 conversion;
+    int format_spec_needs_expanding;
+    int field_present;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+                                     &field_name, &format_spec, &conversion,
+                                     &format_spec_needs_expanding);
+
+    /* all of the SubString objects point into it->str, so no
+       memory management needs to be done on them */
+    assert(0 <= result && result <= 2);
+    if (result == 0 || result == 1)
+        /* if 0, error has already been set, if 1, iterator is empty */
+        return NULL;
+    else {
+        PyObject *literal_str = NULL;
+        PyObject *field_name_str = NULL;
+        PyObject *format_spec_str = NULL;
+        PyObject *conversion_str = NULL;
+        PyObject *tuple = NULL;
+
+        literal_str = SubString_new_object(&literal);
+        if (literal_str == NULL)
+            goto done;
+
+        field_name_str = SubString_new_object(&field_name);
+        if (field_name_str == NULL)
+            goto done;
+
+        /* if field_name is non-zero length, return a string for
+           format_spec (even if zero length), else return None */
+        format_spec_str = (field_present ?
+                           SubString_new_object_or_empty :
+                           SubString_new_object)(&format_spec);
+        if (format_spec_str == NULL)
+            goto done;
+
+        /* if the conversion is not specified, return a None,
+           otherwise create a one length string with the conversion
+           character */
+        if (conversion == '\0') {
+            conversion_str = Py_None;
+            Py_INCREF(conversion_str);
+        }
+        else
+            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+                                                       &conversion, 1);
+        if (conversion_str == NULL)
+            goto done;
+
+        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
+                             conversion_str);
+    done:
+        Py_XDECREF(literal_str);
+        Py_XDECREF(field_name_str);
+        Py_XDECREF(format_spec_str);
+        Py_XDECREF(conversion_str);
+        return tuple;
+    }
+}
+
+static PyMethodDef formatteriter_methods[] = {
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyTypeObject PyFormatterIter_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "formatteriterator",                /* tp_name */
+    sizeof(formatteriterobject),        /* tp_basicsize */
+    0,                                  /* tp_itemsize */
+    /* methods */
+    (destructor)formatteriter_dealloc,  /* tp_dealloc */
+    0,                                  /* tp_print */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_reserved */
+    0,                                  /* tp_repr */
+    0,                                  /* tp_as_number */
+    0,                                  /* tp_as_sequence */
+    0,                                  /* tp_as_mapping */
+    0,                                  /* tp_hash */
+    0,                                  /* tp_call */
+    0,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+    0,                                  /* tp_setattro */
+    0,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    0,                                  /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    0,                                  /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)formatteriter_next,   /* tp_iternext */
+    formatteriter_methods,              /* tp_methods */
+    0,
+};
+
+/* unicode_formatter_parser is used to implement
+   string.Formatter.vformat.  it parses a string and returns tuples
+   describing the parsed elements.  It's a wrapper around
+   stringlib/string_format.h's MarkupIterator */
+static PyObject *
+formatter_parser(PyObject *ignored, PyObject *self)
+{
+    formatteriterobject *it;
+
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
+    if (it == NULL)
+        return NULL;
+
+    /* take ownership, give the object to the iterator */
+    Py_INCREF(self);
+    it->str = self;
+
+    /* initialize the contained MarkupIterator */
+    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
+    return (PyObject *)it;
+}
+
+
+/************************************************************************/
+/*********** fieldnameiterator ******************************************/
+/************************************************************************/
+
+
+/* This is used to implement string.Formatter.vparse().  It parses the
+   field name into attribute and item values.  It's a Python-callable
+   wrapper around FieldNameIterator */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *str;
+    FieldNameIterator it_field;
+} fieldnameiterobject;
+
+static void
+fieldnameiter_dealloc(fieldnameiterobject *it)
+{
+    Py_XDECREF(it->str);
+    PyObject_FREE(it);
+}
+
+/* returns a tuple:
+   (is_attr, value)
+   is_attr is true if we used attribute syntax (e.g., '.foo')
+              false if we used index syntax (e.g., '[foo]')
+   value is an integer or string
+*/
+static PyObject *
+fieldnameiter_next(fieldnameiterobject *it)
+{
+    int result;
+    int is_attr;
+    Py_ssize_t idx;
+    SubString name;
+
+    result = FieldNameIterator_next(&it->it_field, &is_attr,
+                                    &idx, &name);
+    if (result == 0 || result == 1)
+        /* if 0, error has already been set, if 1, iterator is empty */
+        return NULL;
+    else {
+        PyObject* result = NULL;
+        PyObject* is_attr_obj = NULL;
+        PyObject* obj = NULL;
+
+        is_attr_obj = PyBool_FromLong(is_attr);
+        if (is_attr_obj == NULL)
+            goto done;
+
+        /* either an integer or a string */
+        if (idx != -1)
+            obj = PyLong_FromSsize_t(idx);
+        else
+            obj = SubString_new_object(&name);
+        if (obj == NULL)
+            goto done;
+
+        /* return a tuple of values */
+        result = PyTuple_Pack(2, is_attr_obj, obj);
+
+    done:
+        Py_XDECREF(is_attr_obj);
+        Py_XDECREF(obj);
+        return result;
+    }
+}
+
+static PyMethodDef fieldnameiter_methods[] = {
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyTypeObject PyFieldNameIter_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "fieldnameiterator",                /* tp_name */
+    sizeof(fieldnameiterobject),        /* tp_basicsize */
+    0,                                  /* tp_itemsize */
+    /* methods */
+    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
+    0,                                  /* tp_print */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_reserved */
+    0,                                  /* tp_repr */
+    0,                                  /* tp_as_number */
+    0,                                  /* tp_as_sequence */
+    0,                                  /* tp_as_mapping */
+    0,                                  /* tp_hash */
+    0,                                  /* tp_call */
+    0,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+    0,                                  /* tp_setattro */
+    0,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    0,                                  /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    0,                                  /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
+    fieldnameiter_methods,              /* tp_methods */
+    0};
+
+/* unicode_formatter_field_name_split is used to implement
+   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
+   returns a tuple of (first, rest): "first", the part before the
+   first '.' or '['; and "rest", an iterator for the rest of the field
+   name.  it's a wrapper around stringlib/string_format.h's
+   field_name_split.  The iterator it returns is a
+   FieldNameIterator */
+static PyObject *
+formatter_field_name_split(PyObject *ignored, PyObject *self)
+{
+    SubString first;
+    Py_ssize_t first_idx;
+    fieldnameiterobject *it;
+
+    PyObject *first_obj = NULL;
+    PyObject *result = NULL;
+
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
+    if (it == NULL)
+        return NULL;
+
+    /* take ownership, give the object to the iterator.  this is
+       just to keep the field_name alive */
+    Py_INCREF(self);
+    it->str = self;
+
+    /* Pass in auto_number = NULL. We'll return an empty string for
+       first_obj in that case. */
+    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
+                          &first, &first_idx, &it->it_field, NULL))
+        goto done;
+
+    /* first becomes an integer, if possible; else a string */
+    if (first_idx != -1)
+        first_obj = PyLong_FromSsize_t(first_idx);
+    else
+        /* convert "first" into a string object */
+        first_obj = SubString_new_object(&first);
+    if (first_obj == NULL)
+        goto done;
+
+    /* return a tuple of values */
+    result = PyTuple_Pack(2, first_obj, it);
+
+done:
+    Py_XDECREF(it);
+    Py_XDECREF(first_obj);
+    return result;
+}