diff options
author | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-01-19 15:32:46 +0000 |
---|---|---|
committer | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-01-19 15:32:46 +0000 |
commit | 5b08dc45b133d53bd930b4fcab7ffd638fd21f9c (patch) | |
tree | 06b1da786d9203b64042cae60f5762af6aa675e0 | |
parent | 4a41c9a8cc6c95951adf1bbc34c319bc4c7fd5fa (diff) | |
parent | 49777de74cf3b9c7f369634020d39ddadc72bfaf (diff) | |
download | psycopg2-5b08dc45b133d53bd930b4fcab7ffd638fd21f9c.tar.gz |
Merge remote-tracking branch 'origin/register-bytes'
Close #835
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | doc/src/extensions.rst | 9 | ||||
-rw-r--r-- | doc/src/faq.rst | 13 | ||||
-rw-r--r-- | doc/src/usage.rst | 18 | ||||
-rw-r--r-- | lib/extensions.py | 8 | ||||
-rw-r--r-- | psycopg/typecast_array.c | 1 | ||||
-rw-r--r-- | psycopg/typecast_basic.c | 22 | ||||
-rw-r--r-- | psycopg/typecast_builtins.c | 8 | ||||
-rwxr-xr-x | tests/test_quote.py | 11 | ||||
-rwxr-xr-x | tests/test_types_basic.py | 26 |
10 files changed, 100 insertions, 18 deletions
@@ -9,6 +9,8 @@ New features: - Added `~psycopg2.errors` module. Every PostgreSQL error is converted into a specific exception class (:ticket:`#682`). - Added `~psycopg2.extensions.encrypt_password()` function (:ticket:`#576`). +- Added `~psycopg2.extensions.BYTES` adapter to manage databases with mixed + encodings on Python 3 (:ticket:`#835`). - Added `~psycopg2.extensions.Column.table_oid` and `~psycopg2.extensions.Column.table_column` attributes on `cursor.description` items (:ticket:`#661`). diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst index 9704b72..9e99ef1 100644 --- a/doc/src/extensions.rst +++ b/doc/src/extensions.rst @@ -947,6 +947,7 @@ Python objects. All the typecasters are automatically registered, except from the database. See :ref:`unicode-handling` for details. .. data:: BOOLEAN + BYTES DATE DECIMAL FLOAT @@ -963,6 +964,7 @@ from the database. See :ref:`unicode-handling` for details. .. data:: BINARYARRAY BOOLEANARRAY + BYTESARRAY DATEARRAY DATETIMEARRAY DECIMALARRAY @@ -1011,5 +1013,8 @@ from the database. See :ref:`unicode-handling` for details. module. In older versions they can be imported from the implementation module `!psycopg2._psycopg`. -.. versionchanged:: 2.7.2 - added `!*DATETIMETZ*` objects. +.. versionadded:: 2.7.2 + the `!*DATETIMETZ*` objects. + +.. versionadded:: 2.8 + the `!BYTES` and `BYTESARRAY` objects. diff --git a/doc/src/faq.rst b/doc/src/faq.rst index 432e994..24dca21 100644 --- a/doc/src/faq.rst +++ b/doc/src/faq.rst @@ -108,6 +108,19 @@ My database is Unicode, but I receive all the strings as UTF-8 `!str`. Can I rec See :ref:`unicode-handling` for the gory details. +.. _faq-bytes: +.. cssclass:: faq + +My database is in mixed encoding. My program was working on Python 2 but Python 3 fails decoding the strings. How do I avoid decoding? + From psycopg 2.8 you can use the following adapters to always return bytes + from strings:: + + psycopg2.extensions.register_type(psycopg2.extensions.BYTES) + psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY) + + See :ref:`unicode-handling` for an example. + + .. _faq-float: .. cssclass:: faq diff --git a/doc/src/usage.rst b/doc/src/usage.rst index e9416e3..08c6dce 100644 --- a/doc/src/usage.rst +++ b/doc/src/usage.rst @@ -457,13 +457,29 @@ the connection or globally: see the function Unicode, you can register the related typecasters globally as soon as Psycopg is imported:: - import psycopg2 import psycopg2.extensions psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) and forget about this story. +.. note:: + + In some cases, on Python 3, you may want to receive `!bytes` instead of + `!str`, without undergoing to any decoding. This is especially the case if + the data in the database is in mixed encoding. The + `~psycopg2.extensions.BYTES` caster is what you neeed:: + + import psycopg2.extensions + psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn) + psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY, conn) + cur = conn.cursor() + cur.execute("select %s::text", (u"€",)) + cur.fetchone()[0] + b'\xe2\x82\xac' + + .. versionadded: 2.8 + .. index:: single: Buffer; Adaptation diff --git a/lib/extensions.py b/lib/extensions.py index 3e23906..1563575 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -35,10 +35,10 @@ This module holds all the extensions to the DBAPI-2.0 provided by psycopg. import re as _re from psycopg2._psycopg import ( # noqa - BINARYARRAY, BOOLEAN, BOOLEANARRAY, DATE, DATEARRAY, DATETIMEARRAY, - DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER, INTEGERARRAY, - INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY, ROWIDARRAY, - STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY, + BINARYARRAY, BOOLEAN, BOOLEANARRAY, BYTES, BYTESARRAY, DATE, DATEARRAY, + DATETIMEARRAY, DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER, + INTEGERARRAY, INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY, + ROWIDARRAY, STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY, AsIs, Binary, Boolean, Float, Int, QuotedString, ) try: diff --git a/psycopg/typecast_array.c b/psycopg/typecast_array.c index 10c2b37..9cec95c 100644 --- a/psycopg/typecast_array.c +++ b/psycopg/typecast_array.c @@ -286,6 +286,7 @@ typecast_GENERIC_ARRAY_cast(const char *str, Py_ssize_t len, PyObject *curs) #define typecast_DECIMALARRAY_cast typecast_GENERIC_ARRAY_cast #define typecast_STRINGARRAY_cast typecast_GENERIC_ARRAY_cast #define typecast_UNICODEARRAY_cast typecast_GENERIC_ARRAY_cast +#define typecast_BYTESARRAY_cast typecast_GENERIC_ARRAY_cast #define typecast_BOOLEANARRAY_cast typecast_GENERIC_ARRAY_cast #define typecast_DATETIMEARRAY_cast typecast_GENERIC_ARRAY_cast #define typecast_DATETIMETZARRAY_cast typecast_GENERIC_ARRAY_cast diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c index db6c5a9..ff4cf7b 100644 --- a/psycopg/typecast_basic.c +++ b/psycopg/typecast_basic.c @@ -75,18 +75,16 @@ typecast_FLOAT_cast(const char *s, Py_ssize_t len, PyObject *curs) return flo; } -/** STRING - cast strings of any type to python string **/ -#if PY_MAJOR_VERSION < 3 +/** BYTES - cast strings of any type to python bytes **/ + static PyObject * -typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs) +typecast_BYTES_cast(const char *s, Py_ssize_t len, PyObject *curs) { if (s == NULL) { Py_RETURN_NONE; } - return PyString_FromStringAndSize(s, len); + return Bytes_FromStringAndSize(s, len); } -#else -#define typecast_STRING_cast typecast_UNICODE_cast -#endif + /** UNICODE - cast strings of any type to a python unicode object **/ @@ -101,6 +99,16 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) return conn_decode(conn, s, len); } + +/** STRING - cast strings of any type to python string **/ + +#if PY_MAJOR_VERSION < 3 +#define typecast_STRING_cast typecast_BYTES_cast +#else +#define typecast_STRING_cast typecast_UNICODE_cast +#endif + + /** BOOLEAN - cast boolean value into right python object **/ static PyObject * diff --git a/psycopg/typecast_builtins.c b/psycopg/typecast_builtins.c index 446dd14..0e4901d 100644 --- a/psycopg/typecast_builtins.c +++ b/psycopg/typecast_builtins.c @@ -3,7 +3,6 @@ static long int typecast_LONGINTEGER_types[] = {20, 0}; static long int typecast_INTEGER_types[] = {23, 21, 0}; static long int typecast_FLOAT_types[] = {701, 700, 0}; static long int typecast_DECIMAL_types[] = {1700, 0}; -static long int typecast_UNICODE_types[] = {19, 18, 25, 1042, 1043, 0}; static long int typecast_STRING_types[] = {19, 18, 25, 1042, 1043, 0}; static long int typecast_BOOLEAN_types[] = {16, 0}; static long int typecast_DATETIME_types[] = {1114, 0}; @@ -17,7 +16,6 @@ static long int typecast_LONGINTEGERARRAY_types[] = {1016, 0}; static long int typecast_INTEGERARRAY_types[] = {1005, 1006, 1007, 0}; static long int typecast_FLOATARRAY_types[] = {1021, 1022, 0}; static long int typecast_DECIMALARRAY_types[] = {1231, 0}; -static long int typecast_UNICODEARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0}; static long int typecast_STRINGARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0}; static long int typecast_BOOLEANARRAY_types[] = {1000, 0}; static long int typecast_DATETIMEARRAY_types[] = {1115, 0}; @@ -39,7 +37,8 @@ static typecastObject_initlist typecast_builtins[] = { {"INTEGER", typecast_INTEGER_types, typecast_INTEGER_cast, NULL}, {"FLOAT", typecast_FLOAT_types, typecast_FLOAT_cast, NULL}, {"DECIMAL", typecast_DECIMAL_types, typecast_DECIMAL_cast, NULL}, - {"UNICODE", typecast_UNICODE_types, typecast_UNICODE_cast, NULL}, + {"UNICODE", typecast_STRING_types, typecast_UNICODE_cast, NULL}, + {"BYTES", typecast_STRING_types, typecast_BYTES_cast, NULL}, {"STRING", typecast_STRING_types, typecast_STRING_cast, NULL}, {"BOOLEAN", typecast_BOOLEAN_types, typecast_BOOLEAN_cast, NULL}, {"DATETIME", typecast_DATETIME_types, typecast_DATETIME_cast, NULL}, @@ -53,7 +52,8 @@ static typecastObject_initlist typecast_builtins[] = { {"INTEGERARRAY", typecast_INTEGERARRAY_types, typecast_INTEGERARRAY_cast, "INTEGER"}, {"FLOATARRAY", typecast_FLOATARRAY_types, typecast_FLOATARRAY_cast, "FLOAT"}, {"DECIMALARRAY", typecast_DECIMALARRAY_types, typecast_DECIMALARRAY_cast, "DECIMAL"}, - {"UNICODEARRAY", typecast_UNICODEARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"}, + {"UNICODEARRAY", typecast_STRINGARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"}, + {"BYTESARRAY", typecast_STRINGARRAY_types, typecast_BYTESARRAY_cast, "BYTES"}, {"STRINGARRAY", typecast_STRINGARRAY_types, typecast_STRINGARRAY_cast, "STRING"}, {"BOOLEANARRAY", typecast_BOOLEANARRAY_types, typecast_BOOLEANARRAY_cast, "BOOLEAN"}, {"DATETIMEARRAY", typecast_DATETIMEARRAY_types, typecast_DATETIMEARRAY_cast, "DATETIME"}, diff --git a/tests/test_quote.py b/tests/test_quote.py index efb4764..b4679cd 100755 --- a/tests/test_quote.py +++ b/tests/test_quote.py @@ -170,6 +170,17 @@ class QuotingTestCase(ConnectingTestCase): self.assertEqual(res, data) self.assert_(not self.conn.notices) + def test_bytes(self): + snowman = u"\u2603" + conn = self.connect() + conn.set_client_encoding('UNICODE') + psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn) + curs = conn.cursor() + curs.execute("select %s::text", (snowman,)) + x = curs.fetchone()[0] + self.assert_(isinstance(x, bytes)) + self.assertEqual(x, snowman.encode('utf8')) + class TestQuotedString(ConnectingTestCase): def test_encoding_from_conn(self): diff --git a/tests/test_types_basic.py b/tests/test_types_basic.py index 9be4ac2..b5660b6 100755 --- a/tests/test_types_basic.py +++ b/tests/test_types_basic.py @@ -32,6 +32,7 @@ import unittest from .testutils import ConnectingTestCase, long import psycopg2 +from psycopg2.compat import text_type class TypesBasicTests(ConnectingTestCase): @@ -208,6 +209,31 @@ class TypesBasicTests(ConnectingTestCase): self.assertRaises(psycopg2.DataError, psycopg2.extensions.STRINGARRAY, s.encode('utf8'), curs) + def testTextArray(self): + curs = self.conn.cursor() + curs.execute("select '{a,b,c}'::text[]") + x = curs.fetchone()[0] + self.assert_(isinstance(x[0], str)) + self.assertEqual(x, ['a', 'b', 'c']) + + def testUnicodeArray(self): + psycopg2.extensions.register_type( + psycopg2.extensions.UNICODEARRAY, self.conn) + curs = self.conn.cursor() + curs.execute("select '{a,b,c}'::text[]") + x = curs.fetchone()[0] + self.assert_(isinstance(x[0], text_type)) + self.assertEqual(x, [u'a', u'b', u'c']) + + def testBytesArray(self): + psycopg2.extensions.register_type( + psycopg2.extensions.BYTESARRAY, self.conn) + curs = self.conn.cursor() + curs.execute("select '{a,b,c}'::text[]") + x = curs.fetchone()[0] + self.assert_(isinstance(x[0], bytes)) + self.assertEqual(x, [b'a', b'b', b'c']) + @testutils.skip_before_postgres(8, 2) def testArrayOfNulls(self): curs = self.conn.cursor() |