summaryrefslogtreecommitdiff
path: root/Modules/_sre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c129
1 files changed, 100 insertions, 29 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 4b376ec078..d09249672f 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -35,7 +35,7 @@
* other compatibility work.
*/
-static char copyright[] =
+static const char copyright[] =
" SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
#define PY_SSIZE_T_CLEAN
@@ -62,9 +62,6 @@ static char copyright[] =
/* -------------------------------------------------------------------- */
/* optional features */
-/* enables fast searching */
-#define USE_FAST_SEARCH
-
/* enables copy/deepcopy handling (work in progress) */
#undef USE_BUILTIN_COPY
@@ -717,7 +714,7 @@ _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
}
static PyObject*
-call(char* module, char* function, PyObject* args)
+call(const char* module, const char* function, PyObject* args)
{
PyObject* name;
PyObject* mod;
@@ -1059,7 +1056,6 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
PyObject* joiner;
PyObject* item;
PyObject* filter;
- PyObject* args;
PyObject* match;
void* ptr;
Py_ssize_t status;
@@ -1161,13 +1157,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
match = pattern_new_match(self, &state, 1);
if (!match)
goto error;
- args = PyTuple_Pack(1, match);
- if (!args) {
- Py_DECREF(match);
- goto error;
- }
- item = PyObject_CallObject(filter, args);
- Py_DECREF(args);
+ item = _PyObject_CallArg1(filter, match);
Py_DECREF(match);
if (!item)
goto error;
@@ -1516,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->groups = groups;
- Py_XINCREF(groupindex);
+ Py_INCREF(groupindex);
self->groupindex = groupindex;
- Py_XINCREF(indexgroup);
+ Py_INCREF(indexgroup);
self->indexgroup = indexgroup;
- self->weakreflist = NULL;
-
if (!_validate(self)) {
Py_DECREF(self);
return NULL;
@@ -1592,7 +1580,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
- if (skip-adj > (Py_uintptr_t)(end - code)) \
+ if (skip-adj > (uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
@@ -1626,7 +1614,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
case SRE_OP_CHARSET:
offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
- if (offset > (Py_uintptr_t)(end - code))
+ if (offset > (uintptr_t)(end - code))
FAIL;
code += offset;
break;
@@ -1634,7 +1622,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
case SRE_OP_BIGCHARSET:
GET_ARG; /* Number of blocks */
offset = 256/sizeof(SRE_CODE); /* 256-byte table */
- if (offset > (Py_uintptr_t)(end - code))
+ if (offset > (uintptr_t)(end - code))
FAIL;
/* Make sure that each byte points to a valid block */
for (i = 0; i < 256; i++) {
@@ -1643,7 +1631,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
code += offset;
offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
- if (offset > (Py_uintptr_t)(end - code))
+ if (offset > (uintptr_t)(end - code))
FAIL;
code += offset;
break;
@@ -1794,11 +1782,11 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; prefix_len = arg;
GET_ARG;
/* Here comes the prefix string */
- if (prefix_len > (Py_uintptr_t)(newcode - code))
+ if (prefix_len > (uintptr_t)(newcode - code))
FAIL;
code += prefix_len;
/* And here comes the overlap table */
- if (prefix_len > (Py_uintptr_t)(newcode - code))
+ if (prefix_len > (uintptr_t)(newcode - code))
FAIL;
/* Each overlap value should be < prefix_len */
for (i = 0; i < prefix_len; i++) {
@@ -1927,7 +1915,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
- if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
+ if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
@@ -2057,8 +2045,9 @@ match_getindex(MatchObject* self, PyObject* index)
/* Default value */
return 0;
- if (PyLong_Check(index))
- return PyLong_AsSsize_t(index);
+ if (PyIndex_Check(index)) {
+ return PyNumber_AsSsize_t(index, NULL);
+ }
i = -1;
@@ -2135,6 +2124,12 @@ match_group(MatchObject* self, PyObject* args)
return result;
}
+static PyObject*
+match_getitem(MatchObject* self, PyObject* name)
+{
+ return match_getslice(self, name, Py_None);
+}
+
/*[clinic input]
_sre.SRE_Match.groups
@@ -2657,6 +2652,75 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
return (PyObject*) scanner;
}
+static Py_hash_t
+pattern_hash(PatternObject *self)
+{
+ Py_hash_t hash, hash2;
+
+ hash = PyObject_Hash(self->pattern);
+ if (hash == -1) {
+ return -1;
+ }
+
+ hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
+ hash ^= hash2;
+
+ hash ^= self->flags;
+ hash ^= self->isbytes;
+ hash ^= self->codesize;
+
+ if (hash == -1) {
+ hash = -2;
+ }
+ return hash;
+}
+
+static PyObject*
+pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
+{
+ PatternObject *left, *right;
+ int cmp;
+
+ if (op != Py_EQ && op != Py_NE) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+
+ if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+
+ if (lefto == righto) {
+ /* a pattern is equal to itself */
+ return PyBool_FromLong(op == Py_EQ);
+ }
+
+ left = (PatternObject *)lefto;
+ right = (PatternObject *)righto;
+
+ cmp = (left->flags == right->flags
+ && left->isbytes == right->isbytes
+ && left->codesize == right->codesize);
+ if (cmp) {
+ /* Compare the code and the pattern because the same pattern can
+ produce different codes depending on the locale used to compile the
+ pattern when the re.LOCALE flag is used. Don't compare groups,
+ indexgroup nor groupindex: they are derivated from the pattern. */
+ cmp = (memcmp(left->code, right->code,
+ sizeof(left->code[0]) * left->codesize) == 0);
+ }
+ if (cmp) {
+ cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
+ Py_EQ);
+ if (cmp < 0) {
+ return NULL;
+ }
+ }
+ if (op == Py_NE) {
+ cmp = !cmp;
+ }
+ return PyBool_FromLong(cmp);
+}
+
#include "clinic/_sre.c.h"
static PyMethodDef pattern_methods[] = {
@@ -2701,7 +2765,7 @@ static PyTypeObject Pattern_Type = {
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
- 0, /* tp_hash */
+ (hashfunc)pattern_hash, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
@@ -2711,7 +2775,7 @@ static PyTypeObject Pattern_Type = {
pattern_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
- 0, /* tp_richcompare */
+ pattern_richcompare, /* tp_richcompare */
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
@@ -2720,6 +2784,13 @@ static PyTypeObject Pattern_Type = {
pattern_getset, /* tp_getset */
};
+/* Match objects do not support length or assignment, but do support
+ __getitem__. */
+static PyMappingMethods match_as_mapping = {
+ NULL,
+ (binaryfunc)match_getitem,
+ NULL
+};
static PyMethodDef match_methods[] = {
{"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
@@ -2765,7 +2836,7 @@ static PyTypeObject Match_Type = {
(reprfunc)match_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
+ &match_as_mapping, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */