summaryrefslogtreecommitdiff
path: root/bs4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/__init__.py')
-rw-r--r--bs4/__init__.py11
1 files changed, 10 insertions, 1 deletions
diff --git a/bs4/__init__.py b/bs4/__init__.py
index bc611c9..308428a 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -236,7 +236,16 @@ class BeautifulSoup(Tag):
self.builder.soup = None
def __copy__(self):
- return type(self)(self.encode(), builder=self.builder)
+ copy = type(self)(
+ self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
+ )
+
+ # Although we encoded the tree to UTF-8, that may not have
+ # been the encoding of the original markup. Set the copy's
+ # .original_encoding to reflect the original object's
+ # .original_encoding.
+ copy.original_encoding = self.original_encoding
+ return copy
def __getstate__(self):
# Frequently a tree builder can't be pickled.