merge 3.3 (#23367)

author: Benjamin Peterson <benjamin@python.org> 2015-03-02 11:18:40 -0500
committer: Benjamin Peterson <benjamin@python.org> 2015-03-02 11:18:40 -0500
commit: d6407b4e5f83ab386a1722de125f0fdc0d181d94 (patch)
tree: ee24f62a6af8962ebb1c11b102afee10ba34010c /Modules/unicodedata.c
parent: a7f410fcb001feedbe9d66a2b67fc9cae810ca63 (diff)
parent: 004c700927e153000f8a0931d304ac602956a50f (diff)
download: cpython-d6407b4e5f83ab386a1722de125f0fdc0d181d94.tar.gz
1 files changed, 75 insertions, 26 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 9fb1191fc5..507cef3ba1 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -13,10 +13,18 @@
 
    ------------------------------------------------------------------------ */
 
+#define PY_SSIZE_T_CLEAN
+
 #include "Python.h"
 #include "ucnhash.h"
 #include "structmember.h"
 
+/*[clinic input]
+module unicodedata
+class unicodedata.UCD 'PreviousDBVersion *' '&UCD_Type'
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6dac153082d150bc]*/
+
 /* character properties */
 
 typedef struct {
@@ -107,25 +115,63 @@ static Py_UCS4 getuchar(PyUnicodeObject *obj)
 
 /* --- Module API --------------------------------------------------------- */
 
-PyDoc_STRVAR(unicodedata_decimal__doc__,
-"decimal(unichr[, default])\n\
-\n\
-Returns the decimal value assigned to the Unicode character unichr\n\
-as integer. If no such value is defined, default is returned, or, if\n\
-not given, ValueError is raised.");
+/*[clinic input]
+
+unicodedata.UCD.decimal
+
+    unichr: object(type='PyUnicodeObject *', subclass_of='&PyUnicode_Type')
+    default: object=NULL
+    /
+
+Converts a Unicode character into its equivalent decimal value.
+
+Returns the decimal value assigned to the Unicode character unichr
+as integer. If no such value is defined, default is returned, or, if
+not given, ValueError is raised.
+[clinic start generated code]*/
+
+PyDoc_STRVAR(unicodedata_UCD_decimal__doc__,
+"decimal($self, unichr, default=None, /)\n"
+"--\n"
+"\n"
+"Converts a Unicode character into its equivalent decimal value.\n"
+"\n"
+"Returns the decimal value assigned to the Unicode character unichr\n"
+"as integer. If no such value is defined, default is returned, or, if\n"
+"not given, ValueError is raised.");
+
+#define UNICODEDATA_UCD_DECIMAL_METHODDEF    \
+    {"decimal", (PyCFunction)unicodedata_UCD_decimal, METH_VARARGS, unicodedata_UCD_decimal__doc__},
 
 static PyObject *
-unicodedata_decimal(PyObject *self, PyObject *args)
+unicodedata_UCD_decimal_impl(PreviousDBVersion *self, PyUnicodeObject *unichr, PyObject *default_value);
+
+static PyObject *
+unicodedata_UCD_decimal(PreviousDBVersion *self, PyObject *args)
+{
+    PyObject *return_value = NULL;
+    PyUnicodeObject *unichr;
+    PyObject *default_value = NULL;
+
+    if (!PyArg_ParseTuple(args,
+        "O!|O:decimal",
+        &PyUnicode_Type, &unichr, &default_value))
+        goto exit;
+    return_value = unicodedata_UCD_decimal_impl(self, unichr, default_value);
+
+exit:
+    return return_value;
+}
+
+static PyObject *
+unicodedata_UCD_decimal_impl(PreviousDBVersion *self, PyUnicodeObject *unichr, PyObject *default_value)
+/*[clinic end generated code: output=8689669896d293df input=c25c9d2b4de076b1]*/
 {
-    PyUnicodeObject *v;
-    PyObject *defobj = NULL;
     int have_old = 0;
     long rc;
     Py_UCS4 c;
 
-    if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj))
-        return NULL;
-    c = getuchar(v);
+    c = getuchar(unichr);
     if (c == (Py_UCS4)-1)
         return NULL;
 
@@ -145,14 +191,14 @@ unicodedata_decimal(PyObject *self, PyObject *args)
     if (!have_old)
         rc = Py_UNICODE_TODECIMAL(c);
     if (rc < 0) {
-        if (defobj == NULL) {
+        if (default_value == NULL) {
             PyErr_SetString(PyExc_ValueError,
                             "not a decimal");
             return NULL;
         }
         else {
-            Py_INCREF(defobj);
-            return defobj;
+            Py_INCREF(default_value);
+            return default_value;
         }
     }
     return PyLong_FromLong(rc);
@@ -937,7 +983,7 @@ is_unified_ideograph(Py_UCS4 code)
         (0x2B740 <= code && code <= 0x2B81D);   /* CJK Ideograph Extension D */
 }
 
-/* macros used to determine if the given codepoint is in the PUA range that
+/* macros used to determine if the given code point is in the PUA range that
  * we are using to store aliases and named sequences */
 #define IS_ALIAS(cp) ((cp >= aliases_start) && (cp < aliases_end))
 #define IS_NAMED_SEQ(cp) ((cp >= named_sequences_start) && \
@@ -947,7 +993,7 @@ static int
 _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
            int with_alias_and_seq)
 {
-    /* Find the name associated with the given codepoint.
+    /* Find the name associated with the given code point.
      * If with_alias_and_seq is 1, check for names in the Private Use Area 15
      * that we are using for aliases and named sequences. */
     int offset;
@@ -958,7 +1004,7 @@ _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
     if (code >= 0x110000)
         return 0;
 
-    /* XXX should we just skip all the codepoints in the PUAs here? */
+    /* XXX should we just skip all the code points in the PUAs here? */
     if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code)))
         return 0;
 
@@ -1086,8 +1132,8 @@ _check_alias_and_seq(unsigned int cp, Py_UCS4* code, int with_named_seq)
     /* check if named sequences are allowed */
     if (!with_named_seq && IS_NAMED_SEQ(cp))
         return 0;
-    /* if the codepoint is in the PUA range that we use for aliases,
-     * convert it to obtain the right codepoint */
+    /* if the code point is in the PUA range that we use for aliases,
+     * convert it to obtain the right code point */
     if (IS_ALIAS(cp))
         *code = name_aliases[cp-aliases_start];
     else
@@ -1099,9 +1145,9 @@ static int
 _getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code,
          int with_named_seq)
 {
-    /* Return the codepoint associated with the given name.
+    /* Return the code point associated with the given name.
      * Named aliases are resolved too (unless self != NULL (i.e. we are using
-     * 3.2.0)).  If with_named_seq is 1, returns the PUA codepoint that we are
+     * 3.2.0)).  If with_named_seq is 1, returns the PUA code point that we are
      * using for the named sequence, and the caller must then convert it. */
     unsigned int h, v;
     unsigned int mask = code_size-1;
@@ -1234,12 +1280,16 @@ unicodedata_lookup(PyObject* self, PyObject* args)
     Py_UCS4 code;
 
     char* name;
-    int namelen;
+    Py_ssize_t namelen;
     unsigned int index;
     if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen))
         return NULL;
+    if (namelen > INT_MAX) {
+        PyErr_SetString(PyExc_KeyError, "name too long");
+        return NULL;
+    }
 
-    if (!_getcode(self, name, namelen, &code, 1)) {
+    if (!_getcode(self, name, (int)namelen, &code, 1)) {
         PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name);
         return NULL;
     }
@@ -1257,7 +1307,7 @@ unicodedata_lookup(PyObject* self, PyObject* args)
 /* XXX Add doc strings. */
 
 static PyMethodDef unicodedata_functions[] = {
-    {"decimal", unicodedata_decimal, METH_VARARGS, unicodedata_decimal__doc__},
+    UNICODEDATA_UCD_DECIMAL_METHODDEF
     {"digit", unicodedata_digit, METH_VARARGS, unicodedata_digit__doc__},
     {"numeric", unicodedata_numeric, METH_VARARGS, unicodedata_numeric__doc__},
     {"category", unicodedata_category, METH_VARARGS,
@@ -1334,7 +1384,6 @@ this database is based on the UnicodeData.txt file version\n\
 The module uses the same names and symbols as defined by the\n\
 UnicodeData File Format " UNIDATA_VERSION ".");
 
-
 static struct PyModuleDef unicodedatamodule = {
         PyModuleDef_HEAD_INIT,
         "unicodedata",
author	Benjamin Peterson <benjamin@python.org>	2015-03-02 11:18:40 -0500
committer	Benjamin Peterson <benjamin@python.org>	2015-03-02 11:18:40 -0500
commit	d6407b4e5f83ab386a1722de125f0fdc0d181d94 (patch)
tree	ee24f62a6af8962ebb1c11b102afee10ba34010c /Modules/unicodedata.c
parent	a7f410fcb001feedbe9d66a2b67fc9cae810ca63 (diff)
parent	004c700927e153000f8a0931d304ac602956a50f (diff)
download	cpython-d6407b4e5f83ab386a1722de125f0fdc0d181d94.tar.gz