summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Reilink <rob@reilink.net>2018-12-31 12:31:53 +0100
committerRob Reilink <rob@reilink.net>2018-12-31 12:31:53 +0100
commitbd76ca8ac5dd89f23353435adb704a0fcba1c49c (patch)
tree197ac48fe5b3f50acffe27c5c034ea0587a0bb89
parent73a573a1b256fc73598bd4d93756bb618fd815fa (diff)
downloadply-bd76ca8ac5dd89f23353435adb704a0fcba1c49c.tar.gz
Fixes issue #196, including test case
Additionally, the reading of the #included file is put into a separate method, to allow for easier customization of the behaviour, e.g. to limit the path from which files may be included
-rw-r--r--ply/cpp.py16
-rw-r--r--test/test_cpp_nonascii.c2
-rw-r--r--test/testcpp.py18
3 files changed, 34 insertions, 2 deletions
diff --git a/ply/cpp.py b/ply/cpp.py
index 8565a6b..50a44a1 100644
--- a/ply/cpp.py
+++ b/ply/cpp.py
@@ -824,8 +824,7 @@ class Preprocessor(object):
for p in path:
iname = os.path.join(p,filename)
try:
- with open(iname) as f:
- data = f.read()
+ data = self.read_include_file(iname)
dname = os.path.dirname(iname)
if dname:
self.temp_path.insert(0,dname)
@@ -840,6 +839,19 @@ class Preprocessor(object):
print("Couldn't find '%s'" % filename)
# ----------------------------------------------------------------------
+ # read_include_file()
+ #
+ # Reads a source file for inclusion using #include
+ # Could be overridden to e.g. customize encoding, limit access to
+ # certain paths on the filesystem, or provide the contents of system
+ # include files
+ # ----------------------------------------------------------------------
+
+ def read_include_file(self, filepath):
+ with open(filepath, 'r', encoding='utf-8', errors='surrogateescape') as file:
+ return file.read()
+
+ # ----------------------------------------------------------------------
# define()
#
# Define a new macro
diff --git a/test/test_cpp_nonascii.c b/test/test_cpp_nonascii.c
new file mode 100644
index 0000000..3e97d81
--- /dev/null
+++ b/test/test_cpp_nonascii.c
@@ -0,0 +1,2 @@
+/* ë */
+#define x 1 \ No newline at end of file
diff --git a/test/testcpp.py b/test/testcpp.py
index 7e281a3..dbfb3e4 100644
--- a/test/testcpp.py
+++ b/test/testcpp.py
@@ -4,6 +4,7 @@ from multiprocessing import Process, Queue
from six.moves.queue import Empty
import sys
+import locale
if ".." not in sys.path:
sys.path.insert(0, "..")
@@ -132,4 +133,21 @@ a;
"""
)
+ def test_include_nonascii(self):
+ # Issue #196: #included files are read using the current locale's
+ # getdefaultencoding. if a #included file contains non-ascii characters,
+ # while default encoding is e.g. US_ASCII, this causes an error
+ locale.setlocale(locale.LC_ALL, 'C')
+ self.__test_preprocessing("""\
+#include "test_cpp_nonascii.c"
+x;
+
+"""
+ , """\
+
+
+1;
+"""
+ )
+
main()