diff options
author | Leonard Richardson <leonardr@segfault.org> | 2015-07-03 09:20:54 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2015-07-03 09:20:54 -0400 |
commit | a0ce180357b0f33002ee92d37cc3b07e8280da82 (patch) | |
tree | 1f63029b3d3e2e024c35c783b142fc4b24ce02a2 | |
parent | 2641b5f2322c59b5996e5ead655ef5ac41b4a13b (diff) | |
download | beautifulsoup4-a0ce180357b0f33002ee92d37cc3b07e8280da82.tar.gz |
Unicode data cannot have a byte-order mark. Returning early stops a warning from happening.
-rw-r--r-- | bs4/dammit.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/bs4/dammit.py b/bs4/dammit.py index 8e6b347..317ad6d 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -272,6 +272,9 @@ class EncodingDetector: def strip_byte_order_mark(cls, data): """If a byte-order mark is present, strip it and return the encoding it implies.""" encoding = None + if isinstance(data, unicode): + # Unicode data cannot have a byte-order mark. + return data, encoding if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ and (data[2:4] != '\x00\x00'): encoding = 'utf-16be' |