summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniele Varrazzo <daniele.varrazzo@gmail.com>2016-12-26 19:47:48 +0100
committerDaniele Varrazzo <daniele.varrazzo@gmail.com>2016-12-29 22:11:58 +0100
commit3295beb7774098659a40649d65e84f7ae9a4838e (patch)
tree9c62fb63d36d4d7bd9f47936f7410032341a642f
parentdfe547856ee946163dfdc695723f7ab67865228b (diff)
downloadpsycopg2-3295beb7774098659a40649d65e84f7ae9a4838e.tar.gz
Don't look up for Python encoding
Store the encode/decode functions for the right codec in the connection. The Python encoding name has been dropped of the connection to avoid the temptation to use it...
-rw-r--r--psycopg/adapter_qstring.c39
-rw-r--r--psycopg/connection.h5
-rw-r--r--psycopg/connection_int.c210
-rw-r--r--psycopg/connection_type.c1
-rw-r--r--psycopg/cursor_type.c6
-rw-r--r--psycopg/error.h2
-rw-r--r--psycopg/error_type.c15
-rw-r--r--psycopg/lobject_type.c6
-rw-r--r--psycopg/microprotocols.c4
-rw-r--r--psycopg/pqpath.c37
-rw-r--r--psycopg/psycopg.h2
-rw-r--r--psycopg/typecast.c3
-rw-r--r--psycopg/typecast_basic.c7
-rw-r--r--psycopg/utils.c54
14 files changed, 231 insertions, 160 deletions
diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c
index febb49a..73579c5 100644
--- a/psycopg/adapter_qstring.c
+++ b/psycopg/adapter_qstring.c
@@ -36,20 +36,6 @@ static const char *default_encoding = "latin1";
/* qstring_quote - do the quote process on plain and unicode strings */
-const char *
-_qstring_get_encoding(qstringObject *self)
-{
- /* if the wrapped object is an unicode object we can encode it to match
- conn->encoding but if the encoding is not specified we don't know what
- to do and we raise an exception */
- if (self->conn) {
- return self->conn->pyenc;
- }
- else {
- return self->encoding ? self->encoding : default_encoding;
- }
-}
-
static PyObject *
qstring_quote(qstringObject *self)
{
@@ -59,19 +45,15 @@ qstring_quote(qstringObject *self)
const char *encoding;
PyObject *rv = NULL;
- encoding = _qstring_get_encoding(self);
- Dprintf("qstring_quote: encoding to %s", encoding);
-
if (PyUnicode_Check(self->wrapped)) {
- if (encoding) {
- str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL);
- Dprintf("qstring_quote: got encoded object at %p", str);
- if (str == NULL) goto exit;
+ if (self->conn) {
+ if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; }
}
else {
- PyErr_SetString(PyExc_TypeError,
- "missing encoding to encode unicode object");
- goto exit;
+ encoding = self->encoding ? self->encoding : default_encoding;
+ if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) {
+ goto exit;
+ }
}
}
@@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args)
static PyObject *
qstring_get_encoding(qstringObject *self)
{
- const char *encoding;
- encoding = _qstring_get_encoding(self);
- return Text_FromUTF8(encoding);
+ if (self->conn) {
+ return conn_pgenc_to_pyenc(self->conn->encoding, NULL);
+ }
+ else {
+ return Text_FromUTF8(self->encoding ? self->encoding : default_encoding);
+ }
}
static int
diff --git a/psycopg/connection.h b/psycopg/connection.h
index 32b34fa..6c5a5f6 100644
--- a/psycopg/connection.h
+++ b/psycopg/connection.h
@@ -83,8 +83,6 @@ struct connectionObject {
char *dsn; /* data source name */
char *critical; /* critical error on this connection */
char *encoding; /* current backend encoding */
- /* TODO: drop */
- char *pyenc; /* connection encoding python name */
long int closed; /* 1 means connection has been closed;
2 that something horrible happened */
@@ -139,7 +137,10 @@ typedef struct {
/* C-callable functions in connection_int.c and connection_ext.c */
HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str);
+HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b);
+HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len);
HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn);
+HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding);
RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self);
HIDDEN int conn_get_protocol_version(PGconn *pgconn);
HIDDEN int conn_get_server_version(PGconn *pgconn);
diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c
index 83b706b..38688d3 100644
--- a/psycopg/connection_int.c
+++ b/psycopg/connection_int.c
@@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = {
PyObject *
conn_text_from_chars(connectionObject *self, const char *str)
{
-#if PY_MAJOR_VERSION < 3
- return PyString_FromString(str);
-#else
- const char *pyenc = self ? self->pyenc : "ascii";
- return PyUnicode_Decode(str, strlen(str), pyenc, "replace");
-#endif
+ return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL);
+}
+
+
+/* Encode an unicode object into a bytes object in the connection encoding.
+ *
+ * If no connection or encoding is available, default to utf8
+ */
+PyObject *
+conn_encode(connectionObject *self, PyObject *u)
+{
+ PyObject *t = NULL;
+ PyObject *rv = NULL;
+
+ if (!(self && self->pyencoder)) {
+ rv = PyUnicode_AsUTF8String(u);
+ goto exit;
+ }
+
+ if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) {
+ goto exit;
+ }
+
+ if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
+ Py_INCREF(rv);
+
+exit:
+ Py_XDECREF(t);
+
+ return rv;
+}
+
+
+/* decode a c string into a Python unicode in the connection encoding
+ *
+ * len can be < 0: in this case it will be calculated
+ *
+ * If no connection or encoding is available, default to utf8
+ */
+PyObject *
+conn_decode(connectionObject *self, const char *str, Py_ssize_t len)
+{
+ PyObject *b = NULL;
+ PyObject *t = NULL;
+ PyObject *rv = NULL;
+
+ if (len < 0) { len = strlen(str); }
+
+ if (self) {
+ if (self->cdecoder) {
+ return self->cdecoder(str, len, NULL);
+ }
+ else if (self->pydecoder) {
+ if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; }
+ if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) {
+ goto exit;
+ }
+ rv = PyTuple_GetItem(t, 0);
+ Py_XINCREF(rv);
+ }
+ }
+ else {
+ return PyUnicode_FromStringAndSize(str, len);
+ }
+
+exit:
+ Py_XDECREF(t);
+ Py_XDECREF(b);
+ return rv;
}
/* conn_notice_callback - process notices */
@@ -321,61 +384,20 @@ exit:
return rv;
}
-/* Convert a PostgreSQL encoding name to a Python encoding name.
- *
- * Set 'pyenc' to a new copy of the encoding name allocated on the Python heap.
- * Return 0 in case of success, else -1 and set an exception.
- *
- * 'pgenc' should be already normalized (uppercase, no - or _).
- */
-RAISES_NEG static int
-conn_pgenc_to_pyenc(const char *pgenc, char **pyenc)
-{
- char *tmp;
- Py_ssize_t size;
- PyObject *opyenc = NULL;
- int rv = -1;
-
- /* Find the Py encoding name from the PG encoding */
- if (!(opyenc = PyDict_GetItemString(psycoEncodings, pgenc))) {
- PyErr_Format(OperationalError,
- "no Python encoding for PostgreSQL encoding '%s'", pgenc);
- goto exit;
- }
-
- /* Convert the encoding in a bytes string to extract the c string. */
- Py_INCREF(opyenc);
- if (!(opyenc = psycopg_ensure_bytes(opyenc))) {
- goto exit;
- }
-
- if (-1 == Bytes_AsStringAndSize(opyenc, &tmp, &size)) {
- goto exit;
- }
-
- /* have our own copy of the python encoding name */
- rv = psycopg_strdup(pyenc, tmp, size);
-
-exit:
- Py_XDECREF(opyenc);
- return rv;
-}
-
-
/* set fast access functions according to the currently selected encoding
*/
static void
conn_set_fast_codec(connectionObject *self)
{
- Dprintf("conn_set_fast_codec: encoding=%s", self->pyenc);
+ Dprintf("conn_set_fast_codec: encoding=%s", self->encoding);
- if (0 == strcmp(self->pyenc, "utf_8")) {
+ if (0 == strcmp(self->encoding, "UTF8")) {
Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8");
self->cdecoder = PyUnicode_DecodeUTF8;
return;
}
- if (0 == strcmp(self->pyenc, "iso8859_1")) {
+ if (0 == strcmp(self->encoding, "LATIN1")) {
Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1");
self->cdecoder = PyUnicode_DecodeLatin1;
return;
@@ -386,12 +408,45 @@ conn_set_fast_codec(connectionObject *self)
}
+/* Return the Python encoding from a PostgreSQL encoding.
+ *
+ * Optionally return the clean version of the postgres encoding too
+ */
+PyObject *
+conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding)
+{
+ char *pgenc = NULL;
+ PyObject *rv = NULL;
+
+ if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
+ if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) {
+ PyErr_Format(OperationalError,
+ "no Python encoding for PostgreSQL encoding '%s'", pgenc);
+ goto exit;
+ }
+ Py_INCREF(rv);
+
+ if (clean_encoding) {
+ *clean_encoding = pgenc;
+ }
+ else {
+ PyMem_Free(pgenc);
+ }
+
+exit:
+ return rv;
+}
+
/* Convert a Postgres encoding into Python encoding and decoding functions.
*
+ * Set clean_encoding to a clean version of the Postgres encoding name
+ * and pyenc and pydec to python codec functions.
+ *
* Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
-conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
+conn_get_python_codec(const char *encoding,
+ char **clean_encoding, PyObject **pyenc, PyObject **pydec)
{
int rv = -1;
char *pgenc = NULL;
@@ -399,15 +454,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
PyObject *m = NULL, *f = NULL, *codec = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
- if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
-
- /* Find the Py encoding name from the PG encoding */
- if (!(encname = PyDict_GetItemString(psycoEncodings, pgenc))) {
- PyErr_Format(OperationalError,
- "no Python encoding for PostgreSQL encoding '%s'", pgenc);
- goto exit;
- }
- Py_INCREF(encname);
+ if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; }
/* Look up the python codec */
if (!(m = PyImport_ImportModule("codecs"))) { goto exit; }
@@ -419,6 +466,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
/* success */
*pyenc = enc_tmp; enc_tmp = NULL;
*pydec = dec_tmp; dec_tmp = NULL;
+ *clean_encoding = pgenc; pgenc = NULL;
rv = 0;
exit:
@@ -440,20 +488,17 @@ exit:
* Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
-conn_set_encoding(connectionObject *self, const char *encoding)
+conn_store_encoding(connectionObject *self, const char *encoding)
{
int rv = -1;
- char *pgenc = NULL, *pyenc = NULL;
+ char *pgenc = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
- if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } /* TODO: drop */
-
- /* Look for this encoding in Python codecs. */
- if (0 > conn_pgenc_to_pyenc(pgenc, &pyenc)) { goto exit; } /* TODO: drop */
-
- if (0 > conn_get_python_codec(encoding, &enc_tmp, &dec_tmp)) { goto exit; }
+ if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) {
+ goto exit;
+ }
- /* Good, success: store the encoding/pyenc in the connection. */
+ /* Good, success: store the encoding/codec in the connection. */
{
char *tmp = self->encoding;
self->encoding = pgenc;
@@ -461,14 +506,6 @@ conn_set_encoding(connectionObject *self, const char *encoding)
pgenc = NULL;
}
- {
- /* TODO: drop */
- char *tmp = self->pyenc;
- self->pyenc = pyenc;
- PyMem_Free(tmp);
- pyenc = NULL;
- }
-
Py_CLEAR(self->pyencoder);
self->pyencoder = enc_tmp;
enc_tmp = NULL;
@@ -485,7 +522,6 @@ exit:
Py_XDECREF(enc_tmp);
Py_XDECREF(dec_tmp);
PyMem_Free(pgenc);
- PyMem_Free(pyenc);
return rv;
}
@@ -508,7 +544,7 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
goto exit;
}
- if (0 > conn_set_encoding(self, encoding)) {
+ if (0 > conn_store_encoding(self, encoding)) {
goto exit;
}
@@ -1338,16 +1374,14 @@ conn_set_client_encoding(connectionObject *self, const char *pgenc)
PGresult *pgres = NULL;
char *error = NULL;
int res = -1;
- char *pyenc = NULL;
char *clean_enc = NULL;
- /* If the current encoding is equal to the requested one we don't
- issue any query to the backend */
- if (strcmp(self->encoding, pgenc) == 0) return 0;
-
/* We must know what python encoding this encoding is. */
if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; }
- if (0 > conn_pgenc_to_pyenc(clean_enc, &pyenc)) { goto exit; }
+
+ /* If the current encoding is equal to the requested one we don't
+ issue any query to the backend */
+ if (strcmp(self->encoding, clean_enc) == 0) return 0;
Py_BEGIN_ALLOW_THREADS;
pthread_mutex_lock(&self->lock);
@@ -1372,14 +1406,12 @@ endlock:
goto exit;
}
- res = conn_set_encoding(self, pgenc);
+ res = conn_store_encoding(self, pgenc);
- Dprintf("conn_set_client_encoding: set encoding to %s (Python: %s)",
- self->encoding, self->pyenc);
+ Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding);
exit:
PyMem_Free(clean_enc);
- PyMem_Free(pyenc);
return res;
}
diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c
index ba4e433..7401bc1 100644
--- a/psycopg/connection_type.c
+++ b/psycopg/connection_type.c
@@ -1167,7 +1167,6 @@ connection_dealloc(PyObject* obj)
PyMem_Free(self->dsn);
PyMem_Free(self->encoding);
- PyMem_Free(self->pyenc);
if (self->critical) free(self->critical);
if (self->cancel) PQfreeCancel(self->cancel);
diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c
index c580daa..a7303c6 100644
--- a/psycopg/cursor_type.c
+++ b/psycopg/cursor_type.c
@@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic(
Py_INCREF(sql);
}
else if (PyUnicode_Check(sql)) {
- char *enc = self->conn->pyenc;
- sql = PyUnicode_AsEncodedString(sql, enc, NULL);
- /* if there was an error during the encoding from unicode to the
- target encoding, we just let the exception propagate */
- if (sql == NULL) { goto fail; }
+ if (!(sql = conn_encode(self->conn, sql))) { goto fail; }
}
else {
/* the is not unicode or string, raise an error */
diff --git a/psycopg/error.h b/psycopg/error.h
index 8bc4df5..275a7ce 100644
--- a/psycopg/error.h
+++ b/psycopg/error.h
@@ -34,7 +34,7 @@ typedef struct {
PyObject *pgerror;
PyObject *pgcode;
cursorObject *cursor;
- char *pyenc;
+ PyObject *pydecoder;
PGresult *pgres;
} errorObject;
diff --git a/psycopg/error_type.c b/psycopg/error_type.c
index 40b71aa..4ab2191 100644
--- a/psycopg/error_type.c
+++ b/psycopg/error_type.c
@@ -34,17 +34,7 @@
PyObject *
error_text_from_chars(errorObject *self, const char *str)
{
- if (str == NULL) {
- Py_INCREF(Py_None);
- return (Py_None);
- }
-
-#if PY_MAJOR_VERSION < 3
- return PyString_FromString(str);
-#else
- return PyUnicode_Decode(str, strlen(str),
- self->pyenc ? self->pyenc : "ascii", "replace");
-#endif
+ return psycopg_text_from_chars_safe(str, -1, self->pydecoder);
}
@@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg)
Py_VISIT(self->pgerror);
Py_VISIT(self->pgcode);
Py_VISIT(self->cursor);
+ Py_VISIT(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_traverse(
(PyObject *)self, visit, arg);
@@ -104,6 +95,7 @@ error_clear(errorObject *self)
Py_CLEAR(self->pgerror);
Py_CLEAR(self->pgcode);
Py_CLEAR(self->cursor);
+ Py_CLEAR(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self);
}
@@ -113,7 +105,6 @@ error_dealloc(errorObject *self)
{
PyObject_GC_UnTrack((PyObject *)self);
error_clear(self);
- PyMem_Free(self->pyenc);
CLEARPGRES(self->pgres);
Py_TYPE(self)->tp_free((PyObject *)self);
diff --git a/psycopg/lobject_type.c b/psycopg/lobject_type.c
index 61c9232..54f3a4b 100644
--- a/psycopg/lobject_type.c
+++ b/psycopg/lobject_type.c
@@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args)
data = obj;
}
else if (PyUnicode_Check(obj)) {
- if (!(data = PyUnicode_AsEncodedString(obj, self->conn->pyenc, NULL))) {
- goto exit;
- }
+ if (!(data = conn_encode(self->conn, obj))) { goto exit; }
}
else {
PyErr_Format(PyExc_TypeError,
@@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args)
if (self->mode & LOBJECT_BINARY) {
res = Bytes_FromStringAndSize(buffer, size);
} else {
- res = PyUnicode_Decode(buffer, size, self->conn->pyenc, NULL);
+ res = conn_decode(self->conn, buffer, size);
}
PyMem_Free(buffer);
diff --git a/psycopg/microprotocols.c b/psycopg/microprotocols.c
index 7bd3374..3ddcc48 100644
--- a/psycopg/microprotocols.c
+++ b/psycopg/microprotocols.c
@@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn)
/* Convert to bytes. */
if (res && PyUnicode_CheckExact(res)) {
PyObject *b;
- const char *pyenc;
- pyenc = (conn && conn->pyenc) ? conn->pyenc : "utf8";
- b = PyUnicode_AsEncodedString(res, pyenc, NULL);
+ b = conn_encode(conn, res);
Py_DECREF(res);
res = b;
}
diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c
index c8d9c46..328a2b2 100644
--- a/psycopg/pqpath.c
+++ b/psycopg/pqpath.c
@@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
const char *err2 = NULL;
const char *code = NULL;
PyObject *pyerr = NULL;
+ PyObject *pgerror = NULL, *pgcode = NULL;
if (conn == NULL) {
PyErr_SetString(DatabaseError,
@@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
err2 = strip_severity(err);
Dprintf("pq_raise: err2=%s", err2);
+ /* decode now the details of the error, because after psyco_set_error
+ * decoding will fail.
+ */
+ if (!(pgerror = conn_text_from_chars(conn, err))) {
+ /* we can't really handle an exception while handling this error
+ * so just print it. */
+ PyErr_Print();
+ PyErr_Clear();
+ }
+
+ if (!(pgcode = conn_text_from_chars(conn, code))) {
+ PyErr_Print();
+ PyErr_Clear();
+ }
+
pyerr = psyco_set_error(exc, curs, err2);
if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) {
errorObject *perr = (errorObject *)pyerr;
- PyMem_Free(perr->pyenc);
- psycopg_strdup(&perr->pyenc, conn->pyenc, -1);
+ Py_CLEAR(perr->pydecoder);
+ Py_XINCREF(conn->pydecoder);
+ perr->pydecoder = conn->pydecoder;
Py_CLEAR(perr->pgerror);
- perr->pgerror = error_text_from_chars(perr, err);
+ perr->pgerror = pgerror;
+ pgerror = NULL;
Py_CLEAR(perr->pgcode);
- perr->pgcode = error_text_from_chars(perr, code);
+ perr->pgcode = pgcode;
+ pgcode = NULL;
CLEARPGRES(perr->pgres);
if (pgres && *pgres) {
@@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
*pgres = NULL;
}
}
+
+ Py_XDECREF(pgerror);
+ Py_XDECREF(pgcode);
}
/* pq_set_critical, pq_resolve_critical - manage critical errors
@@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs)
/* a file may return unicode if implements io.TextIOBase */
if (PyUnicode_Check(o)) {
PyObject *tmp;
- Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->pyenc);
- if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->pyenc, NULL))) {
+ if (!(tmp = conn_encode(curs->conn, o))) {
Dprintf("_pq_copy_in_v3: encoding() failed");
error = 1;
break;
@@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs)
if (len > 0 && buffer) {
if (is_text) {
- obj = PyUnicode_Decode(buffer, len, curs->conn->pyenc, NULL);
+ obj = conn_decode(curs->conn, buffer, len);
} else {
obj = Bytes_FromStringAndSize(buffer, len);
}
@@ -1638,7 +1659,7 @@ retry:
Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr);
if (repl->decode) {
- str = PyUnicode_Decode(buffer + hdr, data_size, conn->pyenc, NULL);
+ str = conn_decode(conn, buffer + hdr, data_size);
} else {
str = Bytes_FromStringAndSize(buffer + hdr, data_size);
}
diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h
index fc5b533..1367354 100644
--- a/psycopg/psycopg.h
+++ b/psycopg/psycopg.h
@@ -132,6 +132,8 @@ HIDDEN char *psycopg_escape_identifier(connectionObject *conn,
const char *str, Py_ssize_t len);
HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len);
HIDDEN int psycopg_is_text_file(PyObject *f);
+HIDDEN PyObject *psycopg_text_from_chars_safe(
+ const char *str, Py_ssize_t len, PyObject *decoder);
STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj);
diff --git a/psycopg/typecast.c b/psycopg/typecast.c
index d83c390..214d3f0 100644
--- a/psycopg/typecast.c
+++ b/psycopg/typecast.c
@@ -671,8 +671,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs)
#if PY_MAJOR_VERSION < 3
s = PyString_FromStringAndSize(str, len);
#else
- s = PyUnicode_Decode(str, len,
- ((cursorObject *)curs)->conn->pyenc, NULL);
+ s = conn_decode(((cursorObject *)curs)->conn, str, len);
#endif
}
else {
diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c
index d55820c..db6c5a9 100644
--- a/psycopg/typecast_basic.c
+++ b/psycopg/typecast_basic.c
@@ -98,12 +98,7 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
if (s == NULL) { Py_RETURN_NONE; }
conn = ((cursorObject*)curs)->conn;
- if (conn->cdecoder) {
- return conn->cdecoder(s, len, NULL);
- }
- else {
- return PyUnicode_Decode(s, len, conn->pyenc, NULL);
- }
+ return conn_decode(conn, s, len);
}
/** BOOLEAN - cast boolean value into right python object **/
diff --git a/psycopg/utils.c b/psycopg/utils.c
index 85ca9d6..7f6b6e6 100644
--- a/psycopg/utils.c
+++ b/psycopg/utils.c
@@ -278,3 +278,57 @@ exit:
return res;
}
+
+
+/* Convert a C string into Python Text using a specified codec.
+ *
+ * The codec is the python function codec.getdecoder(enc). It is only used on
+ * Python 3 to return unicode: in Py2 the function returns a string.
+ *
+ * len is optional: use -1 to have it calculated by the function.
+ */
+PyObject *
+psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder)
+{
+#if PY_MAJOR_VERSION < 3
+
+ if (!str) { Py_RETURN_NONE; }
+
+ if (len < 0) { len = strlen(str); }
+
+ return PyString_FromStringAndSize(str, len);
+
+#else
+
+ static PyObject *replace = NULL;
+ PyObject *rv = NULL;
+ PyObject *b = NULL;
+ PyObject *t = NULL;
+
+ if (!str) { Py_RETURN_NONE; }
+
+ if (len < 0) { len = strlen(str); }
+
+ if (decoder) {
+ if (!replace) {
+ if (!(replace = PyUnicode_FromString("replace"))) { goto exit; }
+ }
+ if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; }
+ if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) {
+ goto exit;
+ }
+
+ if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
+ Py_INCREF(rv);
+ }
+ else {
+ rv = PyUnicode_DecodeASCII(str, len, "replace");
+ }
+
+exit:
+ Py_XDECREF(t);
+ Py_XDECREF(b);
+ return rv;
+
+#endif
+}