From bec0d4428f6de11ffd90d19dfddb5af9c72d9c63 Mon Sep 17 00:00:00 2001 From: Adam Hupp Date: Mon, 20 Nov 2017 09:21:00 -0800 Subject: python3 str handling In python3 ctypes, a str is passed to ctypes as wchar*. This means the layout of the string magic looks like [ascii, null, ascii, null, etc]. For some reason, magic handles this just fine most of the time, but it's clearly wrong and I'm amazed it worked at all. --- magic.py | 5 +++++ test/run.sh | 4 +++- test/test.py | 7 +++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/magic.py b/magic.py index a97e929..8637c95 100644 --- a/magic.py +++ b/magic.py @@ -72,6 +72,11 @@ class Magic: """ with self.lock: try: + # if we're on python3, convert buf to bytes + # otherwise this string is passed as wchar* + # which is not what libmagic expects + if type(buf) == str and str != bytes: + buf = buf.encode('utf-8', errors='replace') return maybe_decode(magic_buffer(self.cookie, buf)) except MagicException as e: return self._handle509Bug(e) diff --git a/test/run.sh b/test/run.sh index 3768497..2764e15 100755 --- a/test/run.sh +++ b/test/run.sh @@ -1,12 +1,14 @@ #!/bin/sh -set -e # ensure we can use unicode filenames in the test export LC_ALL=en_US.UTF-8 THISDIR=`dirname $0` export PYTHONPATH=${THISDIR}/.. +echo "python2.6" python2.6 ${THISDIR}/test.py +echo "python2.7" python2.7 ${THISDIR}/test.py +echo "python3.0" python3 ${THISDIR}/test.py diff --git a/test/test.py b/test/test.py index 6ae5c07..78ddb2b 100755 --- a/test/test.py +++ b/test/test.py @@ -30,6 +30,13 @@ class MagicTest(unittest.TestCase): break else: self.assertTrue(False, "no match for " + repr(expected_value)) + + def test_from_buffer_str_and_bytes(self): + m = magic.Magic(mime=True) + s = '#!/usr/bin/env python\nprint("foo")' + self.assertEqual("text/x-python", m.from_buffer(s)) + b = b'#!/usr/bin/env python\nprint("foo")' + self.assertEqual("text/x-python", m.from_buffer(b)) def test_mime_types(self): dest = os.path.join(MagicTest.TESTDATA_DIR, b'\xce\xbb'.decode('utf-8')) -- cgit v1.2.1