summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Hupp <adam@hupp.org>2017-12-04 11:55:27 -0800
committerAdam Hupp <adam@hupp.org>2017-12-04 11:55:27 -0800
commita0b9f316fda16b21923bb57e1de9a98789befbba (patch)
treebdd5da3016316b3c0d788939258aaf60f4135c7b
parent10e20995b5f4b8f8131b7e69912882bb81393cb2 (diff)
downloadpython-magic-a0b9f316fda16b21923bb57e1de9a98789befbba.tar.gz
Merge in compatability mode with libmagic
The libmagic distribution uses the same package name `magic` as python-magic, but has an incompatible API. This change merges in a copy of libmagic's bindings, wrapped to give deprecation warnings. This is intended to a) mitigate the short-term pain to users and packagers who need to figure out which to use, and b) give us a path to merging the two sets of bindings. I'd be happy for libmagic to take over this package if we could find a path to it.
-rw-r--r--LICENSE37
-rw-r--r--README.md9
-rw-r--r--magic/__init__.py47
-rw-r--r--magic/compat.py285
-rw-r--r--test/libmagic_test.py35
-rw-r--r--test/run.py1
6 files changed, 404 insertions, 10 deletions
diff --git a/LICENSE b/LICENSE
index 044612d..b8ca4b9 100644
--- a/LICENSE
+++ b/LICENSE
@@ -19,3 +19,40 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+
+
+====
+
+Portions of this package (magic/compat.py and test/libmagic_test.py)
+are distributed under the following copyright notice:
+
+
+$File: LEGAL.NOTICE,v 1.15 2006/05/03 18:48:33 christos Exp $
+Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
+Software written by Ian F. Darwin and others;
+maintained 1994- Christos Zoulas.
+
+This software is not subject to any export provision of the United States
+Department of Commerce, and may be exported to any country or planet.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice immediately at the beginning of the file, without modification,
+ this list of conditions, and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/README.md b/README.md
index fa33f65..044752c 100644
--- a/README.md
+++ b/README.md
@@ -113,14 +113,6 @@ To run against a specific python version:
Minor version bumps should be backwards compatible. Major bumps are not.
-## Name Conflict
-
-There are, sadly, two libraries which use the module name `magic`.
-Both have been around for quite a while. If you are using this module
-and get an error using a method like `open`, your code is expecting
-the other one. Hopefully one day these will be reconciled.
-
-
## Author
Written by Adam Hupp in 2001 for a project that never got off the
@@ -151,4 +143,3 @@ python-magic is distributed under the MIT license. See the included
LICENSE file for details.
I am providing code in the repository to you under an open source license. Because this is my personal repository, the license you receive to my code is from me and not my employer (Facebook).
-
diff --git a/magic/__init__.py b/magic/__init__.py
index fae6620..6ccc8d9 100644
--- a/magic/__init__.py
+++ b/magic/__init__.py
@@ -21,6 +21,7 @@ import glob
import ctypes
import ctypes.util
import threading
+import logging
from ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER
@@ -113,8 +114,10 @@ class Magic:
def from_file(self, filename):
# raise FileNotFoundException or IOError if the file does not exist
- with open(filename):
+ # use __builtins__ because the compat stuff at the bottom shadows the builtin open
+ with __builtins__['open'](filename):
pass
+
with self.lock:
try:
return maybe_decode(magic_file(self.cookie, filename))
@@ -435,3 +438,45 @@ MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed
MAGIC_PARAM_ELF_NOTES_MAX = 4 # # Max ELF sections processed
MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches
MAGIC_PARAM_BYTES_MAX = 6 # Max number of bytes to read from file
+
+# This package name conflicts with the one provided by upstream
+# libmagic. This is a common source of confusion for users. To
+# resolve, We ship a copy of that module, and expose it's functions
+# wrapped in deprecation warnings.
+def add_compat(to_module):
+
+ import warnings, re
+ from magic import compat
+
+ def deprecation_wrapper(compat, fn, alternate):
+ def _(*args, **kwargs):
+ warnings.warn(
+ "Using compatability mode with libmagic's python binding",
+ DeprecationWarning)
+
+ return compat[fn](*args, **kwargs)
+ return _
+
+ fn = [('detect_from_filename', 'magic.from_file'),
+ ('detect_from_content', 'magic.from_buffer'),
+ ('detect_from_fobj', 'magic.Magic.from_open_file'),
+ ('open', 'magic.Magic')]
+ for (fname, alternate) in fn:
+ to_module[fname] = deprecation_wrapper(compat.__dict__, fname, alternate)
+
+ # copy constants over, ensuring there's no conflicts
+ is_const_re = re.compile("^[A-Z_]+$")
+ allowed_inconsistent = set(['MAGIC_MIME'])
+ for name, value in compat.__dict__.items():
+ if is_const_re.match(name):
+ if name in to_module:
+ if name in allowed_inconsistent:
+ continue
+ if to_module[name] != value:
+ raise Exception("inconsistent value for " + name)
+ else:
+ continue
+ else:
+ to_module[name] = value
+
+add_compat(globals())
diff --git a/magic/compat.py b/magic/compat.py
new file mode 100644
index 0000000..662569e
--- /dev/null
+++ b/magic/compat.py
@@ -0,0 +1,285 @@
+# coding: utf-8
+
+'''
+Python bindings for libmagic
+'''
+
+import ctypes
+
+from collections import namedtuple
+
+from ctypes import *
+from ctypes.util import find_library
+
+
+def _init():
+ """
+ Loads the shared library through ctypes and returns a library
+ L{ctypes.CDLL} instance
+ """
+ return ctypes.cdll.LoadLibrary(find_library('magic'))
+
+_libraries = {}
+_libraries['magic'] = _init()
+
+# Flag constants for open and setflags
+MAGIC_NONE = NONE = 0
+MAGIC_DEBUG = DEBUG = 1
+MAGIC_SYMLINK = SYMLINK = 2
+MAGIC_COMPRESS = COMPRESS = 4
+MAGIC_DEVICES = DEVICES = 8
+MAGIC_MIME_TYPE = MIME_TYPE = 16
+MAGIC_CONTINUE = CONTINUE = 32
+MAGIC_CHECK = CHECK = 64
+MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
+MAGIC_RAW = RAW = 256
+MAGIC_ERROR = ERROR = 512
+MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
+MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
+MAGIC_APPLE = APPLE = 2048
+
+MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
+MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
+MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
+MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
+MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
+MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
+MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
+MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
+MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
+
+MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
+
+FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
+
+
+class magic_set(Structure):
+ pass
+magic_set._fields_ = []
+magic_t = POINTER(magic_set)
+
+_open = _libraries['magic'].magic_open
+_open.restype = magic_t
+_open.argtypes = [c_int]
+
+_close = _libraries['magic'].magic_close
+_close.restype = None
+_close.argtypes = [magic_t]
+
+_file = _libraries['magic'].magic_file
+_file.restype = c_char_p
+_file.argtypes = [magic_t, c_char_p]
+
+_descriptor = _libraries['magic'].magic_descriptor
+_descriptor.restype = c_char_p
+_descriptor.argtypes = [magic_t, c_int]
+
+_buffer = _libraries['magic'].magic_buffer
+_buffer.restype = c_char_p
+_buffer.argtypes = [magic_t, c_void_p, c_size_t]
+
+_error = _libraries['magic'].magic_error
+_error.restype = c_char_p
+_error.argtypes = [magic_t]
+
+_setflags = _libraries['magic'].magic_setflags
+_setflags.restype = c_int
+_setflags.argtypes = [magic_t, c_int]
+
+_load = _libraries['magic'].magic_load
+_load.restype = c_int
+_load.argtypes = [magic_t, c_char_p]
+
+_compile = _libraries['magic'].magic_compile
+_compile.restype = c_int
+_compile.argtypes = [magic_t, c_char_p]
+
+_check = _libraries['magic'].magic_check
+_check.restype = c_int
+_check.argtypes = [magic_t, c_char_p]
+
+_list = _libraries['magic'].magic_list
+_list.restype = c_int
+_list.argtypes = [magic_t, c_char_p]
+
+_errno = _libraries['magic'].magic_errno
+_errno.restype = c_int
+_errno.argtypes = [magic_t]
+
+
+class Magic(object):
+ def __init__(self, ms):
+ self._magic_t = ms
+
+ def close(self):
+ """
+ Closes the magic database and deallocates any resources used.
+ """
+ _close(self._magic_t)
+
+ @staticmethod
+ def __tostr(s):
+ if s is None:
+ return None
+ if isinstance(s, str):
+ return s
+ try: # keep Python 2 compatibility
+ return str(s, 'utf-8')
+ except TypeError:
+ return str(s)
+
+ @staticmethod
+ def __tobytes(b):
+ if b is None:
+ return None
+ if isinstance(b, bytes):
+ return b
+ try: # keep Python 2 compatibility
+ return bytes(b, 'utf-8')
+ except TypeError:
+ return bytes(b)
+
+ def file(self, filename):
+ """
+ Returns a textual description of the contents of the argument passed
+ as a filename or None if an error occurred and the MAGIC_ERROR flag
+ is set. A call to errno() will return the numeric error code.
+ """
+ return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
+
+ def descriptor(self, fd):
+ """
+ Returns a textual description of the contents of the argument passed
+ as a file descriptor or None if an error occurred and the MAGIC_ERROR
+ flag is set. A call to errno() will return the numeric error code.
+ """
+ return Magic.__tostr(_descriptor(self._magic_t, fd))
+
+ def buffer(self, buf):
+ """
+ Returns a textual description of the contents of the argument passed
+ as a buffer or None if an error occurred and the MAGIC_ERROR flag
+ is set. A call to errno() will return the numeric error code.
+ """
+ return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
+
+ def error(self):
+ """
+ Returns a textual explanation of the last error or None
+ if there was no error.
+ """
+ return Magic.__tostr(_error(self._magic_t))
+
+ def setflags(self, flags):
+ """
+ Set flags on the magic object which determine how magic checking
+ behaves; a bitwise OR of the flags described in libmagic(3), but
+ without the MAGIC_ prefix.
+
+ Returns -1 on systems that don't support utime(2) or utimes(2)
+ when PRESERVE_ATIME is set.
+ """
+ return _setflags(self._magic_t, flags)
+
+ def load(self, filename=None):
+ """
+ Must be called to load entries in the colon separated list of database
+ files passed as argument or the default database file if no argument
+ before any magic queries can be performed.
+
+ Returns 0 on success and -1 on failure.
+ """
+ return _load(self._magic_t, Magic.__tobytes(filename))
+
+ def compile(self, dbs):
+ """
+ Compile entries in the colon separated list of database files
+ passed as argument or the default database file if no argument.
+ The compiled files created are named from the basename(1) of each file
+ argument with ".mgc" appended to it.
+
+ Returns 0 on success and -1 on failure.
+ """
+ return _compile(self._magic_t, Magic.__tobytes(dbs))
+
+ def check(self, dbs):
+ """
+ Check the validity of entries in the colon separated list of
+ database files passed as argument or the default database file
+ if no argument.
+
+ Returns 0 on success and -1 on failure.
+ """
+ return _check(self._magic_t, Magic.__tobytes(dbs))
+
+ def list(self, dbs):
+ """
+ Check the validity of entries in the colon separated list of
+ database files passed as argument or the default database file
+ if no argument.
+
+ Returns 0 on success and -1 on failure.
+ """
+ return _list(self._magic_t, Magic.__tobytes(dbs))
+
+ def errno(self):
+ """
+ Returns a numeric error code. If return value is 0, an internal
+ magic error occurred. If return value is non-zero, the value is
+ an OS error code. Use the errno module or os.strerror() can be used
+ to provide detailed error information.
+ """
+ return _errno(self._magic_t)
+
+
+def open(flags):
+ """
+ Returns a magic object on success and None on failure.
+ Flags argument as for setflags.
+ """
+ return Magic(_open(flags))
+
+
+# Objects used by `detect_from_` functions
+mime_magic = Magic(_open(MAGIC_MIME))
+mime_magic.load()
+none_magic = Magic(_open(MAGIC_NONE))
+none_magic.load()
+
+
+def _create_filemagic(mime_detected, type_detected):
+ mime_type, mime_encoding = mime_detected.split('; ')
+
+ return FileMagic(name=type_detected, mime_type=mime_type,
+ encoding=mime_encoding.replace('charset=', ''))
+
+
+def detect_from_filename(filename):
+ '''Detect mime type, encoding and file type from a filename
+
+ Returns a `FileMagic` namedtuple.
+ '''
+
+ return _create_filemagic(mime_magic.file(filename),
+ none_magic.file(filename))
+
+
+def detect_from_fobj(fobj):
+ '''Detect mime type, encoding and file type from file-like object
+
+ Returns a `FileMagic` namedtuple.
+ '''
+
+ file_descriptor = fobj.fileno()
+ return _create_filemagic(mime_magic.descriptor(file_descriptor),
+ none_magic.descriptor(file_descriptor))
+
+
+def detect_from_content(byte_content):
+ '''Detect mime type, encoding and file type from bytes
+
+ Returns a `FileMagic` namedtuple.
+ '''
+
+ return _create_filemagic(mime_magic.buffer(byte_content),
+ none_magic.buffer(byte_content))
diff --git a/test/libmagic_test.py b/test/libmagic_test.py
new file mode 100644
index 0000000..5a0a290
--- /dev/null
+++ b/test/libmagic_test.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+
+import unittest
+
+import magic
+
+
+class MagicTestCase(unittest.TestCase):
+
+ filename = 'test/testdata/test.pdf'
+ expected_mime_type = 'application/pdf'
+ expected_encoding = 'us-ascii'
+ expected_name = 'PDF document, version 1.2'
+
+ def assert_result(self, result):
+ self.assertEqual(result.mime_type, self.expected_mime_type)
+ self.assertEqual(result.encoding, self.expected_encoding)
+ self.assertEqual(result.name, self.expected_name)
+
+ def test_detect_from_filename(self):
+ result = magic.detect_from_filename(self.filename)
+ self.assert_result(result)
+
+ def test_detect_from_fobj(self):
+ with open(self.filename) as fobj:
+ result = magic.detect_from_fobj(fobj)
+ self.assert_result(result)
+
+ def test_detect_from_content(self):
+ with open(self.filename) as fobj:
+ result = magic.detect_from_content(fobj.read(4096))
+ self.assert_result(result)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/run.py b/test/run.py
index c10c11f..6b37555 100644
--- a/test/run.py
+++ b/test/run.py
@@ -24,6 +24,7 @@ def run_test(versions):
found = True
print("Testing %s" % i)
subprocess.run([i, os.path.join(this_dir, "test.py")], env=new_env, check=True)
+ subprocess.run([i, os.path.join(this_dir, "libmagic_test.py")], env=new_env, check=True)
if not found:
sys.exit("No versions found: " + str(versions))