From cf81c5ce3d5b91f342c65bdce7b8df7d47b7ab3e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 18 Jun 2016 16:48:07 +0300 Subject: Issue #27177: Match objects in the re module now support index-like objects as group indices. Based on patches by Jeroen Demeyer and Xiang Zhang. --- Modules/_sre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index fb0ab033c5..d379363729 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2049,8 +2049,9 @@ match_getindex(MatchObject* self, PyObject* index) /* Default value */ return 0; - if (PyLong_Check(index)) - return PyLong_AsSsize_t(index); + if (PyIndex_Check(index)) { + return PyNumber_AsSsize_t(index, NULL); + } i = -1; -- cgit v1.2.1 From 201647d227de4a4d8e2830c081567716ee92e8d3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 20 Aug 2016 01:38:00 +0200 Subject: pattern_subx() now uses fast call Issue #27128. --- Modules/_sre.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index 6f8ec0e538..3e8d7f8b48 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1056,7 +1056,6 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, PyObject* joiner; PyObject* item; PyObject* filter; - PyObject* args; PyObject* match; void* ptr; Py_ssize_t status; @@ -1158,13 +1157,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, match = pattern_new_match(self, &state, 1); if (!match) goto error; - args = PyTuple_Pack(1, match); - if (!args) { - Py_DECREF(match); - goto error; - } - item = PyObject_CallObject(filter, args); - Py_DECREF(args); + item = _PyObject_FastCall(filter, &match, 1, NULL); Py_DECREF(match); if (!item) goto error; -- cgit v1.2.1 From de03c6d8066496680a1ea99ff9e67e28852b0007 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Aug 2016 22:48:54 +0200 Subject: Rename _PyObject_FastCall() to _PyObject_FastCallDict() Issue #27809: * Rename _PyObject_FastCall() function to _PyObject_FastCallDict() * Add _PyObject_FastCall(), _PyObject_CallNoArg() and _PyObject_CallArg1() macros calling _PyObject_FastCallDict() --- Modules/_sre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index 3e8d7f8b48..0a62f62dc6 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1157,7 +1157,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, match = pattern_new_match(self, &state, 1); if (!match) goto error; - item = _PyObject_FastCall(filter, &match, 1, NULL); + item = _PyObject_CallArg1(filter, match); Py_DECREF(match); if (!item) goto error; -- cgit v1.2.1 From 82b75eb31160ec9c025e450ac9fa3300d560a245 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 6 Sep 2016 13:47:26 -0700 Subject: replace Py_(u)intptr_t with the c99 standard types --- Modules/_sre.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index 0a62f62dc6..afa90999ac 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1582,7 +1582,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, skip = *code; \ VTRACE(("%lu (skip to %p)\n", \ (unsigned long)skip, code+skip)); \ - if (skip-adj > (Py_uintptr_t)(end - code)) \ + if (skip-adj > (uintptr_t)(end - code)) \ FAIL; \ code++; \ } while (0) @@ -1616,7 +1616,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end) case SRE_OP_CHARSET: offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ - if (offset > (Py_uintptr_t)(end - code)) + if (offset > (uintptr_t)(end - code)) FAIL; code += offset; break; @@ -1624,7 +1624,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end) case SRE_OP_BIGCHARSET: GET_ARG; /* Number of blocks */ offset = 256/sizeof(SRE_CODE); /* 256-byte table */ - if (offset > (Py_uintptr_t)(end - code)) + if (offset > (uintptr_t)(end - code)) FAIL; /* Make sure that each byte points to a valid block */ for (i = 0; i < 256; i++) { @@ -1633,7 +1633,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end) } code += offset; offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ - if (offset > (Py_uintptr_t)(end - code)) + if (offset > (uintptr_t)(end - code)) FAIL; code += offset; break; @@ -1784,11 +1784,11 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; prefix_len = arg; GET_ARG; /* Here comes the prefix string */ - if (prefix_len > (Py_uintptr_t)(newcode - code)) + if (prefix_len > (uintptr_t)(newcode - code)) FAIL; code += prefix_len; /* And here comes the overlap table */ - if (prefix_len > (Py_uintptr_t)(newcode - code)) + if (prefix_len > (uintptr_t)(newcode - code)) FAIL; /* Each overlap value should be < prefix_len */ for (i = 0; i < prefix_len; i++) { @@ -1917,7 +1917,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) to allow arbitrary jumps anywhere in the code; so we just look for a JUMP opcode preceding our skip target. */ - if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) && + if (skip >= 3 && skip-3 < (uintptr_t)(end - code) && code[skip-3] == SRE_OP_JUMP) { VTRACE(("both then and else parts present\n")); -- cgit v1.2.1 From e45f1d22ce9cb300fa52ba1037a83efa5dc90b3d Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sun, 11 Sep 2016 08:55:43 -0400 Subject: Issue 24454: Improve the usability of the re match object named group API --- Modules/_sre.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index afa90999ac..e4372bedb6 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2121,6 +2121,12 @@ match_group(MatchObject* self, PyObject* args) return result; } +static PyObject* +match_getitem(MatchObject* self, PyObject* name) +{ + return match_getslice(self, name, Py_None); +} + /*[clinic input] _sre.SRE_Match.groups @@ -2416,6 +2422,9 @@ PyDoc_STRVAR(match_group_doc, Return subgroup(s) of the match by indices or names.\n\ For 0 returns the entire match."); +PyDoc_STRVAR(match_getitem_doc, +"__getitem__(name) <==> group(name).\n"); + static PyObject * match_lastindex_get(MatchObject *self) { @@ -2706,6 +2715,13 @@ static PyTypeObject Pattern_Type = { pattern_getset, /* tp_getset */ }; +/* Match objects do not support length or assignment, but do support + __getitem__. */ +static PyMappingMethods match_as_mapping = { + NULL, + (binaryfunc)match_getitem, + NULL +}; static PyMethodDef match_methods[] = { {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, @@ -2717,6 +2733,7 @@ static PyMethodDef match_methods[] = { _SRE_SRE_MATCH_EXPAND_METHODDEF _SRE_SRE_MATCH___COPY___METHODDEF _SRE_SRE_MATCH___DEEPCOPY___METHODDEF + {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST, match_getitem_doc}, {NULL, NULL} }; @@ -2751,7 +2768,7 @@ static PyTypeObject Match_Type = { (reprfunc)match_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ + &match_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ -- cgit v1.2.1 From d6b9d0686ef3554c80161789b37b4dcfa4528b23 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sun, 11 Sep 2016 09:50:47 -0400 Subject: Issue 24454: Added whatsnew entry, removed __getitem__ from match_methods. Thanks Serhiy Storchaka. --- Modules/_sre.c | 1 - 1 file changed, 1 deletion(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index e4372bedb6..a25d935a20 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2733,7 +2733,6 @@ static PyMethodDef match_methods[] = { _SRE_SRE_MATCH_EXPAND_METHODDEF _SRE_SRE_MATCH___COPY___METHODDEF _SRE_SRE_MATCH___DEEPCOPY___METHODDEF - {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST, match_getitem_doc}, {NULL, NULL} }; -- cgit v1.2.1 From 6ff17c29ca92c2ce3d90e312866f969d69da2768 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sun, 11 Sep 2016 10:20:27 -0400 Subject: Issue 24454: Removed unused match_getitem_doc. --- Modules/_sre.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index a25d935a20..69c7bc0de6 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2422,9 +2422,6 @@ PyDoc_STRVAR(match_group_doc, Return subgroup(s) of the match by indices or names.\n\ For 0 returns the entire match."); -PyDoc_STRVAR(match_getitem_doc, -"__getitem__(name) <==> group(name).\n"); - static PyObject * match_lastindex_get(MatchObject *self) { -- cgit v1.2.1 From 3481b2443d72c41a7356937952f69d647284a3c0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 21 Nov 2016 16:35:08 +0100 Subject: Implement rich comparison for _sre.SRE_Pattern Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by re.compile(), become comparable (only x==y and x!=y operators). This change should fix the issue #18383: don't duplicate warning filters when the warnings module is reloaded (thing usually only done in unit tests). --- Modules/_sre.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 6 deletions(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index 69c7bc0de6..c1e9fa6e6b 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, self->groups = groups; - Py_XINCREF(groupindex); + Py_INCREF(groupindex); self->groupindex = groupindex; - Py_XINCREF(indexgroup); + Py_INCREF(indexgroup); self->indexgroup = indexgroup; - self->weakreflist = NULL; - if (!_validate(self)) { Py_DECREF(self); return NULL; @@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_ return (PyObject*) scanner; } +static Py_hash_t +pattern_hash(PatternObject *self) +{ + Py_hash_t hash, hash2; + + hash = PyObject_Hash(self->pattern); + if (hash == -1) { + return -1; + } + + hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); + hash ^= hash2; + + hash ^= self->flags; + hash ^= self->isbytes; + hash ^= self->codesize; + + if (hash == -1) { + hash = -2; + } + return hash; +} + +static PyObject* +pattern_richcompare(PyObject *lefto, PyObject *righto, int op) +{ + PatternObject *left, *right; + int cmp; + + if (op != Py_EQ && op != Py_NE) { + Py_RETURN_NOTIMPLEMENTED; + } + + if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { + Py_RETURN_NOTIMPLEMENTED; + } + left = (PatternObject *)lefto; + right = (PatternObject *)righto; + + cmp = (left->flags == right->flags + && left->isbytes == right->isbytes + && left->codesize && right->codesize); + if (cmp) { + /* Compare the code and the pattern because the same pattern can + produce different codes depending on the locale used to compile the + pattern when the re.LOCALE flag is used. Don't compare groups, + indexgroup nor groupindex: they are derivated from the pattern. */ + cmp = (memcmp(left->code, right->code, + sizeof(left->code[0]) * left->codesize) == 0); + } + if (cmp) { + cmp = PyObject_RichCompareBool(left->pattern, right->pattern, + Py_EQ); + if (cmp < 0) { + return NULL; + } + } + if (op == Py_NE) { + cmp = !cmp; + } + return PyBool_FromLong(cmp); +} + #include "clinic/_sre.c.h" static PyMethodDef pattern_methods[] = { @@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = { 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)pattern_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ @@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = { pattern_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ + pattern_richcompare, /* tp_richcompare */ offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ -- cgit v1.2.1 From 2970b7895c51700cc335e59e7b4a141cc9b35a75 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2016 15:23:00 +0100 Subject: Issue #28727: Fix typo in pattern_richcompare() Typo catched by Serhiy Storchaka, thanks! --- Modules/_sre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index c1e9fa6e6b..1b7741696d 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2688,7 +2688,7 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) cmp = (left->flags == right->flags && left->isbytes == right->isbytes - && left->codesize && right->codesize); + && left->codesize == right->codesize); if (cmp) { /* Compare the code and the pattern because the same pattern can produce different codes depending on the locale used to compile the -- cgit v1.2.1 From 4d83386bceca38593641c1921fb175a541fb88c2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Nov 2016 15:30:38 +0100 Subject: Issue #28727: Optimize pattern_richcompare() for a==a A pattern is equal to itself. --- Modules/_sre.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Modules/_sre.c') diff --git a/Modules/_sre.c b/Modules/_sre.c index 1b7741696d..979e61fb53 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2683,6 +2683,12 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { Py_RETURN_NOTIMPLEMENTED; } + + if (lefto == righto) { + /* a pattern is equal to itself */ + return PyBool_FromLong(op == Py_EQ); + } + left = (PatternObject *)lefto; right = (PatternObject *)righto; -- cgit v1.2.1