summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiampaolo Rodola <g.rodola@gmail.com>2017-05-04 22:35:15 +0200
committerGiampaolo Rodola <g.rodola@gmail.com>2017-05-04 22:35:15 +0200
commit982f255b4a14c96482ad1ce455b0f2fb0275e144 (patch)
treeedd6c91074d8749d8139ee684fa0c2ff06bc4246
parenta1f2a0970fcf353e01fe5abd5c3a4e7a50037648 (diff)
downloadpsutil-982f255b4a14c96482ad1ce455b0f2fb0275e144.tar.gz
#1040: on py 3.6 use sys.getfilesystemencodeerrors() to determined the default error handler instead of guessing it
-rw-r--r--docs/index.rst8
-rw-r--r--psutil/_common.py13
-rw-r--r--psutil/_pslinux.py13
-rw-r--r--psutil/_pswindows.py12
4 files changed, 31 insertions, 15 deletions
diff --git a/docs/index.rst b/docs/index.rst
index 02f93570..350af93f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -2250,11 +2250,13 @@ The notes below apply to *any* API returning a string such as
:meth:`Process.exe` or :meth:`Process.cwd`, including non-filesystem related
methods such as :meth:`Process.username` or :meth:`WindowsService.description`:
-* all strings are encoded by using the OS filesystem encoding which varies
- depending on the platform (e.g. UTF-8 on Linux, mbcs on Win)
+* all strings are encoded by using the OS filesystem encoding
+ (``sys.getfilesystemencoding()``) which varies depending on the platform
+ (e.g. "UTF-8" on OSX, "mbcs" on Win)
* no API call is supposed to crash with ``UnicodeDecodeError``
* instead, in case of badly encoded data returned by the OS, the following error handlers are used to replace the corrupted characters in the string:
- * Python 3: ``"surrogatescape"`` on POSIX and ``"replace"`` on Windows
+ * Python 3: ``sys.getfilesystemencodeerrors()`` (PY 3.6+) or
+ ``"surrogatescape"`` on POSIX and ``"replace"`` on Windows
* Python 2: ``"replace"``
* on Python 2 all APIs return bytes (``str`` type), never ``unicode``
* on Python 2, you can go back to ``unicode`` by doing:
diff --git a/psutil/_common.py b/psutil/_common.py
index 54cb1ff5..e6ba9dc3 100644
--- a/psutil/_common.py
+++ b/psutil/_common.py
@@ -27,6 +27,8 @@ try:
except ImportError:
AF_UNIX = None
+from psutil._compat import PY3
+
if sys.version_info >= (3, 4):
import enum
else:
@@ -132,6 +134,17 @@ else:
globals().update(BatteryTime.__members__)
+# --- others
+
+ENCODING = sys.getfilesystemencoding()
+if not PY3:
+ ENCODING_ERRS = "replace"
+else:
+ try:
+ ENCODING_ERRS = sys.getfilesystemencodeerrors() # py 3.6
+ except AttributeError:
+ ENCODING_ERRS = "surrogateescape" if POSIX else "replace"
+
# ===================================================================
# --- namedtuples
diff --git a/psutil/_pslinux.py b/psutil/_pslinux.py
index 1a890a87..7a03940a 100644
--- a/psutil/_pslinux.py
+++ b/psutil/_pslinux.py
@@ -25,13 +25,15 @@ from . import _common
from . import _psposix
from . import _psutil_linux as cext
from . import _psutil_posix as cext_posix
+from ._common import ENCODING
+from ._common import ENCODING_ERRS
from ._common import isfile_strict
from ._common import memoize
from ._common import memoize_when_activated
-from ._common import parse_environ_block
from ._common import NIC_DUPLEX_FULL
from ._common import NIC_DUPLEX_HALF
from ._common import NIC_DUPLEX_UNKNOWN
+from ._common import parse_environ_block
from ._common import path_exists_strict
from ._common import supports_ipv6
from ._common import usage_percent
@@ -84,9 +86,6 @@ BOOT_TIME = None # set later
BIGGER_FILE_BUFFERING = -1 if PY3 else 8192
LITTLE_ENDIAN = sys.byteorder == 'little'
SECTOR_SIZE_FALLBACK = 512
-if PY3:
- FS_ENCODING = sys.getfilesystemencoding()
- ENCODING_ERRORS_HANDLER = 'surrogateescape'
if enum is None:
AF_LINK = socket.AF_PACKET
else:
@@ -200,14 +199,14 @@ def open_text(fname, **kwargs):
# See:
# https://github.com/giampaolo/psutil/issues/675
# https://github.com/giampaolo/psutil/pull/733
- kwargs.setdefault('encoding', FS_ENCODING)
- kwargs.setdefault('errors', ENCODING_ERRORS_HANDLER)
+ kwargs.setdefault('encoding', ENCODING)
+ kwargs.setdefault('errors', ENCODING_ERRS)
return open(fname, "rt", **kwargs)
if PY3:
def decode(s):
- return s.decode(encoding=FS_ENCODING, errors=ENCODING_ERRORS_HANDLER)
+ return s.decode(encoding=ENCODING, errors=ENCODING_ERRS)
else:
def decode(s):
return s
diff --git a/psutil/_pswindows.py b/psutil/_pswindows.py
index dd7b07a1..9f55194f 100644
--- a/psutil/_pswindows.py
+++ b/psutil/_pswindows.py
@@ -32,11 +32,13 @@ except ImportError as err:
raise
from ._common import conn_tmap
+from ._common import ENCODING
+from ._common import ENCODING_ERRS
from ._common import isfile_strict
+from ._common import memoize_when_activated
from ._common import parse_environ_block
from ._common import sockfam_to_enum
from ._common import socktype_to_enum
-from ._common import memoize_when_activated
from ._common import usage_percent
from ._compat import long
from ._compat import lru_cache
@@ -71,8 +73,6 @@ __extra__all__ = [
# --- globals
# =====================================================================
-FS_ENCODING = sys.getfilesystemencoding()
-PY2_ENCODING_ERRS = "replace"
CONN_DELETE_TCB = "DELETE_TCB"
WAIT_TIMEOUT = 0x00000102 # 258 in decimal
ACCESS_DENIED_SET = frozenset([errno.EPERM, errno.EACCES,
@@ -198,7 +198,7 @@ def py2_strencode(s):
if isinstance(s, str):
return s
else:
- return s.encode(FS_ENCODING, errors=PY2_ENCODING_ERRS)
+ return s.encode(ENCODING, errors=ENCODING_ERRS)
# =====================================================================
@@ -240,7 +240,9 @@ disk_io_counters = cext.disk_io_counters
def disk_usage(path):
"""Return disk usage associated with path."""
if PY3 and isinstance(path, bytes):
- path = path.decode(FS_ENCODING)
+ # XXX: do we want to use "strict"? Probably yes, in order
+ # to fail immediately. After all we are accepting input here...
+ path = path.decode(ENCODING, errors="strict")
total, free = cext.disk_usage(path)
used = total - free
percent = usage_percent(used, total, _round=1)