diff options
author | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-04 22:35:15 +0200 |
---|---|---|
committer | Giampaolo Rodola <g.rodola@gmail.com> | 2017-05-04 22:35:15 +0200 |
commit | 982f255b4a14c96482ad1ce455b0f2fb0275e144 (patch) | |
tree | edd6c91074d8749d8139ee684fa0c2ff06bc4246 | |
parent | a1f2a0970fcf353e01fe5abd5c3a4e7a50037648 (diff) | |
download | psutil-982f255b4a14c96482ad1ce455b0f2fb0275e144.tar.gz |
#1040: on py 3.6 use sys.getfilesystemencodeerrors() to determined the default error handler instead of guessing it
-rw-r--r-- | docs/index.rst | 8 | ||||
-rw-r--r-- | psutil/_common.py | 13 | ||||
-rw-r--r-- | psutil/_pslinux.py | 13 | ||||
-rw-r--r-- | psutil/_pswindows.py | 12 |
4 files changed, 31 insertions, 15 deletions
diff --git a/docs/index.rst b/docs/index.rst index 02f93570..350af93f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2250,11 +2250,13 @@ The notes below apply to *any* API returning a string such as :meth:`Process.exe` or :meth:`Process.cwd`, including non-filesystem related methods such as :meth:`Process.username` or :meth:`WindowsService.description`: -* all strings are encoded by using the OS filesystem encoding which varies - depending on the platform (e.g. UTF-8 on Linux, mbcs on Win) +* all strings are encoded by using the OS filesystem encoding + (``sys.getfilesystemencoding()``) which varies depending on the platform + (e.g. "UTF-8" on OSX, "mbcs" on Win) * no API call is supposed to crash with ``UnicodeDecodeError`` * instead, in case of badly encoded data returned by the OS, the following error handlers are used to replace the corrupted characters in the string: - * Python 3: ``"surrogatescape"`` on POSIX and ``"replace"`` on Windows + * Python 3: ``sys.getfilesystemencodeerrors()`` (PY 3.6+) or + ``"surrogatescape"`` on POSIX and ``"replace"`` on Windows * Python 2: ``"replace"`` * on Python 2 all APIs return bytes (``str`` type), never ``unicode`` * on Python 2, you can go back to ``unicode`` by doing: diff --git a/psutil/_common.py b/psutil/_common.py index 54cb1ff5..e6ba9dc3 100644 --- a/psutil/_common.py +++ b/psutil/_common.py @@ -27,6 +27,8 @@ try: except ImportError: AF_UNIX = None +from psutil._compat import PY3 + if sys.version_info >= (3, 4): import enum else: @@ -132,6 +134,17 @@ else: globals().update(BatteryTime.__members__) +# --- others + +ENCODING = sys.getfilesystemencoding() +if not PY3: + ENCODING_ERRS = "replace" +else: + try: + ENCODING_ERRS = sys.getfilesystemencodeerrors() # py 3.6 + except AttributeError: + ENCODING_ERRS = "surrogateescape" if POSIX else "replace" + # =================================================================== # --- namedtuples diff --git a/psutil/_pslinux.py b/psutil/_pslinux.py index 1a890a87..7a03940a 100644 --- a/psutil/_pslinux.py +++ b/psutil/_pslinux.py @@ -25,13 +25,15 @@ from . import _common from . import _psposix from . import _psutil_linux as cext from . import _psutil_posix as cext_posix +from ._common import ENCODING +from ._common import ENCODING_ERRS from ._common import isfile_strict from ._common import memoize from ._common import memoize_when_activated -from ._common import parse_environ_block from ._common import NIC_DUPLEX_FULL from ._common import NIC_DUPLEX_HALF from ._common import NIC_DUPLEX_UNKNOWN +from ._common import parse_environ_block from ._common import path_exists_strict from ._common import supports_ipv6 from ._common import usage_percent @@ -84,9 +86,6 @@ BOOT_TIME = None # set later BIGGER_FILE_BUFFERING = -1 if PY3 else 8192 LITTLE_ENDIAN = sys.byteorder == 'little' SECTOR_SIZE_FALLBACK = 512 -if PY3: - FS_ENCODING = sys.getfilesystemencoding() - ENCODING_ERRORS_HANDLER = 'surrogateescape' if enum is None: AF_LINK = socket.AF_PACKET else: @@ -200,14 +199,14 @@ def open_text(fname, **kwargs): # See: # https://github.com/giampaolo/psutil/issues/675 # https://github.com/giampaolo/psutil/pull/733 - kwargs.setdefault('encoding', FS_ENCODING) - kwargs.setdefault('errors', ENCODING_ERRORS_HANDLER) + kwargs.setdefault('encoding', ENCODING) + kwargs.setdefault('errors', ENCODING_ERRS) return open(fname, "rt", **kwargs) if PY3: def decode(s): - return s.decode(encoding=FS_ENCODING, errors=ENCODING_ERRORS_HANDLER) + return s.decode(encoding=ENCODING, errors=ENCODING_ERRS) else: def decode(s): return s diff --git a/psutil/_pswindows.py b/psutil/_pswindows.py index dd7b07a1..9f55194f 100644 --- a/psutil/_pswindows.py +++ b/psutil/_pswindows.py @@ -32,11 +32,13 @@ except ImportError as err: raise from ._common import conn_tmap +from ._common import ENCODING +from ._common import ENCODING_ERRS from ._common import isfile_strict +from ._common import memoize_when_activated from ._common import parse_environ_block from ._common import sockfam_to_enum from ._common import socktype_to_enum -from ._common import memoize_when_activated from ._common import usage_percent from ._compat import long from ._compat import lru_cache @@ -71,8 +73,6 @@ __extra__all__ = [ # --- globals # ===================================================================== -FS_ENCODING = sys.getfilesystemencoding() -PY2_ENCODING_ERRS = "replace" CONN_DELETE_TCB = "DELETE_TCB" WAIT_TIMEOUT = 0x00000102 # 258 in decimal ACCESS_DENIED_SET = frozenset([errno.EPERM, errno.EACCES, @@ -198,7 +198,7 @@ def py2_strencode(s): if isinstance(s, str): return s else: - return s.encode(FS_ENCODING, errors=PY2_ENCODING_ERRS) + return s.encode(ENCODING, errors=ENCODING_ERRS) # ===================================================================== @@ -240,7 +240,9 @@ disk_io_counters = cext.disk_io_counters def disk_usage(path): """Return disk usage associated with path.""" if PY3 and isinstance(path, bytes): - path = path.decode(FS_ENCODING) + # XXX: do we want to use "strict"? Probably yes, in order + # to fail immediately. After all we are accepting input here... + path = path.decode(ENCODING, errors="strict") total, free = cext.disk_usage(path) used = total - free percent = usage_percent(used, total, _round=1) |