summaryrefslogtreecommitdiff
path: root/psutil/tests/test_unicode.py
blob: 81a28807c3a6142ab753e1bc2029e43bbbcc355a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (c) 2009, Giampaolo Rodola'. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
Notes about unicode handling in psutil
======================================

Starting from version 5.3.0 psutil adds unicode support, see:
https://github.com/giampaolo/psutil/issues/1040
The notes below apply to *any* API returning a string such as
process exe(), cwd() or username():

* all strings are encoded by using the OS filesystem encoding
  (sys.getfilesystemencoding()) which varies depending on the platform
  (e.g. "UTF-8" on macOS, "mbcs" on Win)
* no API call is supposed to crash with UnicodeDecodeError
* instead, in case of badly encoded data returned by the OS, the
  following error handlers are used to replace the corrupted characters in
  the string:
    * Python 3: sys.getfilesystemencodeerrors() (PY 3.6+) or
      "surrogatescape" on POSIX and "replace" on Windows
    * Python 2: "replace"
* on Python 2 all APIs return bytes (str type), never unicode
* on Python 2, you can go back to unicode by doing:

    >>> unicode(p.exe(), sys.getdefaultencoding(), errors="replace")

For a detailed explanation of how psutil handles unicode see #1040.

Tests
=====

List of APIs returning or dealing with a string:
('not tested' means they are not tested to deal with non-ASCII strings):

* Process.cmdline()
* Process.connections('unix')
* Process.cwd()
* Process.environ()
* Process.exe()
* Process.memory_maps()
* Process.name()
* Process.open_files()
* Process.username()             (not tested)

* disk_io_counters()             (not tested)
* disk_partitions()              (not tested)
* disk_usage(str)
* net_connections('unix')
* net_if_addrs()                 (not tested)
* net_if_stats()                 (not tested)
* net_io_counters()              (not tested)
* sensors_fans()                 (not tested)
* sensors_temperatures()         (not tested)
* users()                        (not tested)

* WindowsService.binpath()       (not tested)
* WindowsService.description()   (not tested)
* WindowsService.display_name()  (not tested)
* WindowsService.name()          (not tested)
* WindowsService.status()        (not tested)
* WindowsService.username()      (not tested)

In here we create a unicode path with a funky non-ASCII name and (where
possible) make psutil return it back (e.g. on name(), exe(), open_files(),
etc.) and make sure that:

* psutil never crashes with UnicodeDecodeError
* the returned path matches
"""

import os
import traceback
import warnings
from contextlib import closing

from psutil import BSD
from psutil import MACOS
from psutil import OPENBSD
from psutil import POSIX
from psutil import WINDOWS
from psutil._compat import PY3
from psutil._compat import u
from psutil.tests import APPVEYOR
from psutil.tests import CIRRUS
from psutil.tests import ASCII_FS
from psutil.tests import bind_unix_socket
from psutil.tests import chdir
from psutil.tests import copyload_shared_lib
from psutil.tests import create_exe
from psutil.tests import get_test_subprocess
from psutil.tests import HAS_CONNECTIONS_UNIX
from psutil.tests import HAS_ENVIRON
from psutil.tests import HAS_MEMORY_MAPS
from psutil.tests import PYPY
from psutil.tests import reap_children
from psutil.tests import safe_mkdir
from psutil.tests import safe_rmpath as _safe_rmpath
from psutil.tests import skip_on_access_denied
from psutil.tests import TESTFILE_PREFIX
from psutil.tests import TESTFN
from psutil.tests import TESTFN_UNICODE
from psutil.tests import TRAVIS
from psutil.tests import unittest
from psutil.tests import unix_socket_path
import psutil


def safe_rmpath(path):
    if APPVEYOR:
        # TODO - this is quite random and I'm not sure why it happens,
        # nor I can reproduce it locally:
        # https://ci.appveyor.com/project/giampaolo/psutil/build/job/
        #     jiq2cgd6stsbtn60
        # safe_rmpath() happens after reap_children() so this is weird
        # Perhaps wait_procs() on Windows is broken? Maybe because
        # of STILL_ACTIVE?
        # https://github.com/giampaolo/psutil/blob/
        #     68c7a70728a31d8b8b58f4be6c4c0baa2f449eda/psutil/arch/
        #     windows/process_info.c#L146
        try:
            return _safe_rmpath(path)
        except WindowsError:
            traceback.print_exc()
    else:
        return _safe_rmpath(path)


def subprocess_supports_unicode(name):
    """Return True if both the fs and the subprocess module can
    deal with a unicode file name.
    """
    if PY3:
        return True
    try:
        safe_rmpath(name)
        create_exe(name)
        get_test_subprocess(cmd=[name])
    except UnicodeEncodeError:
        return False
    else:
        return True
    finally:
        reap_children()


# An invalid unicode string.
if PY3:
    INVALID_NAME = (TESTFN.encode('utf8') + b"f\xc0\x80").decode(
        'utf8', 'surrogateescape')
else:
    INVALID_NAME = TESTFN + "f\xc0\x80"


# ===================================================================
# FS APIs
# ===================================================================


class _BaseFSAPIsTests(object):
    funky_name = None

    @classmethod
    def setUpClass(cls):
        safe_rmpath(cls.funky_name)
        create_exe(cls.funky_name)

    @classmethod
    def tearDownClass(cls):
        reap_children()
        safe_rmpath(cls.funky_name)

    def tearDown(self):
        reap_children()

    def expect_exact_path_match(self):
        raise NotImplementedError("must be implemented in subclass")

    def test_proc_exe(self):
        subp = get_test_subprocess(cmd=[self.funky_name])
        p = psutil.Process(subp.pid)
        exe = p.exe()
        self.assertIsInstance(exe, str)
        if self.expect_exact_path_match():
            self.assertEqual(os.path.normcase(exe),
                             os.path.normcase(self.funky_name))

    def test_proc_name(self):
        subp = get_test_subprocess(cmd=[self.funky_name])
        name = psutil.Process(subp.pid).name()
        self.assertIsInstance(name, str)
        if self.expect_exact_path_match():
            self.assertEqual(name, os.path.basename(self.funky_name))

    def test_proc_cmdline(self):
        subp = get_test_subprocess(cmd=[self.funky_name])
        p = psutil.Process(subp.pid)
        cmdline = p.cmdline()
        for part in cmdline:
            self.assertIsInstance(part, str)
        if self.expect_exact_path_match():
            self.assertEqual(cmdline, [self.funky_name])

    def test_proc_cwd(self):
        dname = self.funky_name + "2"
        self.addCleanup(safe_rmpath, dname)
        safe_mkdir(dname)
        with chdir(dname):
            p = psutil.Process()
            cwd = p.cwd()
        self.assertIsInstance(p.cwd(), str)
        if self.expect_exact_path_match():
            self.assertEqual(cwd, dname)

    @unittest.skipIf(PYPY and WINDOWS, "fails on PYPY + WINDOWS")
    def test_proc_open_files(self):
        p = psutil.Process()
        start = set(p.open_files())
        with open(self.funky_name, 'rb'):
            new = set(p.open_files())
        path = (new - start).pop().path
        self.assertIsInstance(path, str)
        if BSD and not path:
            # XXX - see https://github.com/giampaolo/psutil/issues/595
            return self.skipTest("open_files on BSD is broken")
        if self.expect_exact_path_match():
            self.assertEqual(os.path.normcase(path),
                             os.path.normcase(self.funky_name))

    @unittest.skipIf(not POSIX, "POSIX only")
    def test_proc_connections(self):
        suffix = os.path.basename(self.funky_name)
        with unix_socket_path(suffix=suffix) as name:
            try:
                sock = bind_unix_socket(name)
            except UnicodeEncodeError:
                if PY3:
                    raise
                else:
                    raise unittest.SkipTest("not supported")
            with closing(sock):
                conn = psutil.Process().connections('unix')[0]
                self.assertIsInstance(conn.laddr, str)
                # AF_UNIX addr not set on OpenBSD
                if not OPENBSD and not CIRRUS:  # XXX
                    self.assertEqual(conn.laddr, name)

    @unittest.skipIf(not POSIX, "POSIX only")
    @unittest.skipIf(not HAS_CONNECTIONS_UNIX, "can't list UNIX sockets")
    @skip_on_access_denied()
    def test_net_connections(self):
        def find_sock(cons):
            for conn in cons:
                if os.path.basename(conn.laddr).startswith(TESTFILE_PREFIX):
                    return conn
            raise ValueError("connection not found")

        suffix = os.path.basename(self.funky_name)
        with unix_socket_path(suffix=suffix) as name:
            try:
                sock = bind_unix_socket(name)
            except UnicodeEncodeError:
                if PY3:
                    raise
                else:
                    raise unittest.SkipTest("not supported")
            with closing(sock):
                cons = psutil.net_connections(kind='unix')
                # AF_UNIX addr not set on OpenBSD
                if not OPENBSD:
                    conn = find_sock(cons)
                    self.assertIsInstance(conn.laddr, str)
                    self.assertEqual(conn.laddr, name)

    def test_disk_usage(self):
        dname = self.funky_name + "2"
        self.addCleanup(safe_rmpath, dname)
        safe_mkdir(dname)
        psutil.disk_usage(dname)

    @unittest.skipIf(not HAS_MEMORY_MAPS, "not supported")
    @unittest.skipIf(not PY3, "ctypes does not support unicode on PY2")
    @unittest.skipIf(PYPY and WINDOWS,
                     "copyload_shared_lib() unsupported on PYPY + WINDOWS")
    def test_memory_maps(self):
        # XXX: on Python 2, using ctypes.CDLL with a unicode path
        # opens a message box which blocks the test run.
        with copyload_shared_lib(dst_prefix=self.funky_name) as funky_path:
            def normpath(p):
                return os.path.realpath(os.path.normcase(p))
            libpaths = [normpath(x.path)
                        for x in psutil.Process().memory_maps()]
            # ...just to have a clearer msg in case of failure
            libpaths = [x for x in libpaths if TESTFILE_PREFIX in x]
            self.assertIn(normpath(funky_path), libpaths)
            for path in libpaths:
                self.assertIsInstance(path, str)


# https://travis-ci.org/giampaolo/psutil/jobs/440073249
@unittest.skipIf(PYPY and TRAVIS, "unreliable on PYPY + TRAVIS")
@unittest.skipIf(MACOS and TRAVIS, "unreliable on TRAVIS")  # TODO
@unittest.skipIf(ASCII_FS, "ASCII fs")
@unittest.skipIf(not subprocess_supports_unicode(TESTFN_UNICODE),
                 "subprocess can't deal with unicode")
class TestFSAPIs(_BaseFSAPIsTests, unittest.TestCase):
    """Test FS APIs with a funky, valid, UTF8 path name."""
    funky_name = TESTFN_UNICODE

    @classmethod
    def expect_exact_path_match(cls):
        # Do not expect psutil to correctly handle unicode paths on
        # Python 2 if os.listdir() is not able either.
        here = '.' if isinstance(cls.funky_name, str) else u('.')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            return cls.funky_name in os.listdir(here)


@unittest.skipIf(PYPY and TRAVIS, "unreliable on PYPY + TRAVIS")
@unittest.skipIf(MACOS and TRAVIS, "unreliable on TRAVIS")  # TODO
@unittest.skipIf(PYPY, "unreliable on PYPY")
@unittest.skipIf(not subprocess_supports_unicode(INVALID_NAME),
                 "subprocess can't deal with invalid unicode")
class TestFSAPIsWithInvalidPath(_BaseFSAPIsTests, unittest.TestCase):
    """Test FS APIs with a funky, invalid path name."""
    funky_name = INVALID_NAME

    @classmethod
    def expect_exact_path_match(cls):
        # Invalid unicode names are supposed to work on Python 2.
        return True


# ===================================================================
# Non fs APIs
# ===================================================================


class TestNonFSAPIS(unittest.TestCase):
    """Unicode tests for non fs-related APIs."""

    def tearDown(self):
        reap_children()

    @unittest.skipIf(not HAS_ENVIRON, "not supported")
    @unittest.skipIf(PYPY and WINDOWS, "segfaults on PYPY + WINDOWS")
    def test_proc_environ(self):
        # Note: differently from others, this test does not deal
        # with fs paths. On Python 2 subprocess module is broken as
        # it's not able to handle with non-ASCII env vars, so
        # we use "è", which is part of the extended ASCII table
        # (unicode point <= 255).
        env = os.environ.copy()
        funky_str = TESTFN_UNICODE if PY3 else 'è'
        env['FUNNY_ARG'] = funky_str
        sproc = get_test_subprocess(env=env)
        p = psutil.Process(sproc.pid)
        env = p.environ()
        for k, v in env.items():
            self.assertIsInstance(k, str)
            self.assertIsInstance(v, str)
        self.assertEqual(env['FUNNY_ARG'], funky_str)


if __name__ == '__main__':
    from psutil.tests.runner import run
    run(__file__)