summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniele Varrazzo <daniele.varrazzo@gmail.com>2019-01-19 15:32:46 +0000
committerDaniele Varrazzo <daniele.varrazzo@gmail.com>2019-01-19 15:32:46 +0000
commit5b08dc45b133d53bd930b4fcab7ffd638fd21f9c (patch)
tree06b1da786d9203b64042cae60f5762af6aa675e0
parent4a41c9a8cc6c95951adf1bbc34c319bc4c7fd5fa (diff)
parent49777de74cf3b9c7f369634020d39ddadc72bfaf (diff)
downloadpsycopg2-5b08dc45b133d53bd930b4fcab7ffd638fd21f9c.tar.gz
Merge remote-tracking branch 'origin/register-bytes'
Close #835
-rw-r--r--NEWS2
-rw-r--r--doc/src/extensions.rst9
-rw-r--r--doc/src/faq.rst13
-rw-r--r--doc/src/usage.rst18
-rw-r--r--lib/extensions.py8
-rw-r--r--psycopg/typecast_array.c1
-rw-r--r--psycopg/typecast_basic.c22
-rw-r--r--psycopg/typecast_builtins.c8
-rwxr-xr-xtests/test_quote.py11
-rwxr-xr-xtests/test_types_basic.py26
10 files changed, 100 insertions, 18 deletions
diff --git a/NEWS b/NEWS
index e5084ec..76b6b80 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,8 @@ New features:
- Added `~psycopg2.errors` module. Every PostgreSQL error is converted into
a specific exception class (:ticket:`#682`).
- Added `~psycopg2.extensions.encrypt_password()` function (:ticket:`#576`).
+- Added `~psycopg2.extensions.BYTES` adapter to manage databases with mixed
+ encodings on Python 3 (:ticket:`#835`).
- Added `~psycopg2.extensions.Column.table_oid` and
`~psycopg2.extensions.Column.table_column` attributes on `cursor.description`
items (:ticket:`#661`).
diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst
index 9704b72..9e99ef1 100644
--- a/doc/src/extensions.rst
+++ b/doc/src/extensions.rst
@@ -947,6 +947,7 @@ Python objects. All the typecasters are automatically registered, except
from the database. See :ref:`unicode-handling` for details.
.. data:: BOOLEAN
+ BYTES
DATE
DECIMAL
FLOAT
@@ -963,6 +964,7 @@ from the database. See :ref:`unicode-handling` for details.
.. data:: BINARYARRAY
BOOLEANARRAY
+ BYTESARRAY
DATEARRAY
DATETIMEARRAY
DECIMALARRAY
@@ -1011,5 +1013,8 @@ from the database. See :ref:`unicode-handling` for details.
module. In older versions they can be imported from the implementation
module `!psycopg2._psycopg`.
-.. versionchanged:: 2.7.2
- added `!*DATETIMETZ*` objects.
+.. versionadded:: 2.7.2
+ the `!*DATETIMETZ*` objects.
+
+.. versionadded:: 2.8
+ the `!BYTES` and `BYTESARRAY` objects.
diff --git a/doc/src/faq.rst b/doc/src/faq.rst
index 432e994..24dca21 100644
--- a/doc/src/faq.rst
+++ b/doc/src/faq.rst
@@ -108,6 +108,19 @@ My database is Unicode, but I receive all the strings as UTF-8 `!str`. Can I rec
See :ref:`unicode-handling` for the gory details.
+.. _faq-bytes:
+.. cssclass:: faq
+
+My database is in mixed encoding. My program was working on Python 2 but Python 3 fails decoding the strings. How do I avoid decoding?
+ From psycopg 2.8 you can use the following adapters to always return bytes
+ from strings::
+
+ psycopg2.extensions.register_type(psycopg2.extensions.BYTES)
+ psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY)
+
+ See :ref:`unicode-handling` for an example.
+
+
.. _faq-float:
.. cssclass:: faq
diff --git a/doc/src/usage.rst b/doc/src/usage.rst
index e9416e3..08c6dce 100644
--- a/doc/src/usage.rst
+++ b/doc/src/usage.rst
@@ -457,13 +457,29 @@ the connection or globally: see the function
Unicode, you can register the related typecasters globally as soon as
Psycopg is imported::
- import psycopg2
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
and forget about this story.
+.. note::
+
+ In some cases, on Python 3, you may want to receive `!bytes` instead of
+ `!str`, without undergoing to any decoding. This is especially the case if
+ the data in the database is in mixed encoding. The
+ `~psycopg2.extensions.BYTES` caster is what you neeed::
+
+ import psycopg2.extensions
+ psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn)
+ psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY, conn)
+ cur = conn.cursor()
+ cur.execute("select %s::text", (u"€",))
+ cur.fetchone()[0]
+ b'\xe2\x82\xac'
+
+ .. versionadded: 2.8
+
.. index::
single: Buffer; Adaptation
diff --git a/lib/extensions.py b/lib/extensions.py
index 3e23906..1563575 100644
--- a/lib/extensions.py
+++ b/lib/extensions.py
@@ -35,10 +35,10 @@ This module holds all the extensions to the DBAPI-2.0 provided by psycopg.
import re as _re
from psycopg2._psycopg import ( # noqa
- BINARYARRAY, BOOLEAN, BOOLEANARRAY, DATE, DATEARRAY, DATETIMEARRAY,
- DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER, INTEGERARRAY,
- INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY, ROWIDARRAY,
- STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY,
+ BINARYARRAY, BOOLEAN, BOOLEANARRAY, BYTES, BYTESARRAY, DATE, DATEARRAY,
+ DATETIMEARRAY, DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER,
+ INTEGERARRAY, INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY,
+ ROWIDARRAY, STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY,
AsIs, Binary, Boolean, Float, Int, QuotedString, )
try:
diff --git a/psycopg/typecast_array.c b/psycopg/typecast_array.c
index 10c2b37..9cec95c 100644
--- a/psycopg/typecast_array.c
+++ b/psycopg/typecast_array.c
@@ -286,6 +286,7 @@ typecast_GENERIC_ARRAY_cast(const char *str, Py_ssize_t len, PyObject *curs)
#define typecast_DECIMALARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_STRINGARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_UNICODEARRAY_cast typecast_GENERIC_ARRAY_cast
+#define typecast_BYTESARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_BOOLEANARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATETIMEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATETIMETZARRAY_cast typecast_GENERIC_ARRAY_cast
diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c
index db6c5a9..ff4cf7b 100644
--- a/psycopg/typecast_basic.c
+++ b/psycopg/typecast_basic.c
@@ -75,18 +75,16 @@ typecast_FLOAT_cast(const char *s, Py_ssize_t len, PyObject *curs)
return flo;
}
-/** STRING - cast strings of any type to python string **/
-#if PY_MAJOR_VERSION < 3
+/** BYTES - cast strings of any type to python bytes **/
+
static PyObject *
-typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
+typecast_BYTES_cast(const char *s, Py_ssize_t len, PyObject *curs)
{
if (s == NULL) { Py_RETURN_NONE; }
- return PyString_FromStringAndSize(s, len);
+ return Bytes_FromStringAndSize(s, len);
}
-#else
-#define typecast_STRING_cast typecast_UNICODE_cast
-#endif
+
/** UNICODE - cast strings of any type to a python unicode object **/
@@ -101,6 +99,16 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
return conn_decode(conn, s, len);
}
+
+/** STRING - cast strings of any type to python string **/
+
+#if PY_MAJOR_VERSION < 3
+#define typecast_STRING_cast typecast_BYTES_cast
+#else
+#define typecast_STRING_cast typecast_UNICODE_cast
+#endif
+
+
/** BOOLEAN - cast boolean value into right python object **/
static PyObject *
diff --git a/psycopg/typecast_builtins.c b/psycopg/typecast_builtins.c
index 446dd14..0e4901d 100644
--- a/psycopg/typecast_builtins.c
+++ b/psycopg/typecast_builtins.c
@@ -3,7 +3,6 @@ static long int typecast_LONGINTEGER_types[] = {20, 0};
static long int typecast_INTEGER_types[] = {23, 21, 0};
static long int typecast_FLOAT_types[] = {701, 700, 0};
static long int typecast_DECIMAL_types[] = {1700, 0};
-static long int typecast_UNICODE_types[] = {19, 18, 25, 1042, 1043, 0};
static long int typecast_STRING_types[] = {19, 18, 25, 1042, 1043, 0};
static long int typecast_BOOLEAN_types[] = {16, 0};
static long int typecast_DATETIME_types[] = {1114, 0};
@@ -17,7 +16,6 @@ static long int typecast_LONGINTEGERARRAY_types[] = {1016, 0};
static long int typecast_INTEGERARRAY_types[] = {1005, 1006, 1007, 0};
static long int typecast_FLOATARRAY_types[] = {1021, 1022, 0};
static long int typecast_DECIMALARRAY_types[] = {1231, 0};
-static long int typecast_UNICODEARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0};
static long int typecast_STRINGARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0};
static long int typecast_BOOLEANARRAY_types[] = {1000, 0};
static long int typecast_DATETIMEARRAY_types[] = {1115, 0};
@@ -39,7 +37,8 @@ static typecastObject_initlist typecast_builtins[] = {
{"INTEGER", typecast_INTEGER_types, typecast_INTEGER_cast, NULL},
{"FLOAT", typecast_FLOAT_types, typecast_FLOAT_cast, NULL},
{"DECIMAL", typecast_DECIMAL_types, typecast_DECIMAL_cast, NULL},
- {"UNICODE", typecast_UNICODE_types, typecast_UNICODE_cast, NULL},
+ {"UNICODE", typecast_STRING_types, typecast_UNICODE_cast, NULL},
+ {"BYTES", typecast_STRING_types, typecast_BYTES_cast, NULL},
{"STRING", typecast_STRING_types, typecast_STRING_cast, NULL},
{"BOOLEAN", typecast_BOOLEAN_types, typecast_BOOLEAN_cast, NULL},
{"DATETIME", typecast_DATETIME_types, typecast_DATETIME_cast, NULL},
@@ -53,7 +52,8 @@ static typecastObject_initlist typecast_builtins[] = {
{"INTEGERARRAY", typecast_INTEGERARRAY_types, typecast_INTEGERARRAY_cast, "INTEGER"},
{"FLOATARRAY", typecast_FLOATARRAY_types, typecast_FLOATARRAY_cast, "FLOAT"},
{"DECIMALARRAY", typecast_DECIMALARRAY_types, typecast_DECIMALARRAY_cast, "DECIMAL"},
- {"UNICODEARRAY", typecast_UNICODEARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"},
+ {"UNICODEARRAY", typecast_STRINGARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"},
+ {"BYTESARRAY", typecast_STRINGARRAY_types, typecast_BYTESARRAY_cast, "BYTES"},
{"STRINGARRAY", typecast_STRINGARRAY_types, typecast_STRINGARRAY_cast, "STRING"},
{"BOOLEANARRAY", typecast_BOOLEANARRAY_types, typecast_BOOLEANARRAY_cast, "BOOLEAN"},
{"DATETIMEARRAY", typecast_DATETIMEARRAY_types, typecast_DATETIMEARRAY_cast, "DATETIME"},
diff --git a/tests/test_quote.py b/tests/test_quote.py
index efb4764..b4679cd 100755
--- a/tests/test_quote.py
+++ b/tests/test_quote.py
@@ -170,6 +170,17 @@ class QuotingTestCase(ConnectingTestCase):
self.assertEqual(res, data)
self.assert_(not self.conn.notices)
+ def test_bytes(self):
+ snowman = u"\u2603"
+ conn = self.connect()
+ conn.set_client_encoding('UNICODE')
+ psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn)
+ curs = conn.cursor()
+ curs.execute("select %s::text", (snowman,))
+ x = curs.fetchone()[0]
+ self.assert_(isinstance(x, bytes))
+ self.assertEqual(x, snowman.encode('utf8'))
+
class TestQuotedString(ConnectingTestCase):
def test_encoding_from_conn(self):
diff --git a/tests/test_types_basic.py b/tests/test_types_basic.py
index 9be4ac2..b5660b6 100755
--- a/tests/test_types_basic.py
+++ b/tests/test_types_basic.py
@@ -32,6 +32,7 @@ import unittest
from .testutils import ConnectingTestCase, long
import psycopg2
+from psycopg2.compat import text_type
class TypesBasicTests(ConnectingTestCase):
@@ -208,6 +209,31 @@ class TypesBasicTests(ConnectingTestCase):
self.assertRaises(psycopg2.DataError,
psycopg2.extensions.STRINGARRAY, s.encode('utf8'), curs)
+ def testTextArray(self):
+ curs = self.conn.cursor()
+ curs.execute("select '{a,b,c}'::text[]")
+ x = curs.fetchone()[0]
+ self.assert_(isinstance(x[0], str))
+ self.assertEqual(x, ['a', 'b', 'c'])
+
+ def testUnicodeArray(self):
+ psycopg2.extensions.register_type(
+ psycopg2.extensions.UNICODEARRAY, self.conn)
+ curs = self.conn.cursor()
+ curs.execute("select '{a,b,c}'::text[]")
+ x = curs.fetchone()[0]
+ self.assert_(isinstance(x[0], text_type))
+ self.assertEqual(x, [u'a', u'b', u'c'])
+
+ def testBytesArray(self):
+ psycopg2.extensions.register_type(
+ psycopg2.extensions.BYTESARRAY, self.conn)
+ curs = self.conn.cursor()
+ curs.execute("select '{a,b,c}'::text[]")
+ x = curs.fetchone()[0]
+ self.assert_(isinstance(x[0], bytes))
+ self.assertEqual(x, [b'a', b'b', b'c'])
+
@testutils.skip_before_postgres(8, 2)
def testArrayOfNulls(self):
curs = self.conn.cursor()