test_unicode.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # Copyright (c) 2009, Giampaolo Rodola'. All rights reserved.
  4. # Use of this source code is governed by a BSD-style license that can be
  5. # found in the LICENSE file.
  6. """
  7. Notes about unicode handling in psutil
  8. ======================================
  9. Starting from version 5.3.0 psutil adds unicode support, see:
  10. https://github.com/giampaolo/psutil/issues/1040
  11. The notes below apply to *any* API returning a string such as
  12. process exe(), cwd() or username():
  13. * all strings are encoded by using the OS filesystem encoding
  14. (sys.getfilesystemencoding()) which varies depending on the platform
  15. (e.g. "UTF-8" on macOS, "mbcs" on Win)
  16. * no API call is supposed to crash with UnicodeDecodeError
  17. * instead, in case of badly encoded data returned by the OS, the
  18. following error handlers are used to replace the corrupted characters in
  19. the string:
  20. * Python 3: sys.getfilesystemencodeerrors() (PY 3.6+) or
  21. "surrogatescape" on POSIX and "replace" on Windows
  22. * Python 2: "replace"
  23. * on Python 2 all APIs return bytes (str type), never unicode
  24. * on Python 2, you can go back to unicode by doing:
  25. >>> unicode(p.exe(), sys.getdefaultencoding(), errors="replace")
  26. For a detailed explanation of how psutil handles unicode see #1040.
  27. Tests
  28. =====
  29. List of APIs returning or dealing with a string:
  30. ('not tested' means they are not tested to deal with non-ASCII strings):
  31. * Process.cmdline()
  32. * Process.connections('unix')
  33. * Process.cwd()
  34. * Process.environ()
  35. * Process.exe()
  36. * Process.memory_maps()
  37. * Process.name()
  38. * Process.open_files()
  39. * Process.username() (not tested)
  40. * disk_io_counters() (not tested)
  41. * disk_partitions() (not tested)
  42. * disk_usage(str)
  43. * net_connections('unix')
  44. * net_if_addrs() (not tested)
  45. * net_if_stats() (not tested)
  46. * net_io_counters() (not tested)
  47. * sensors_fans() (not tested)
  48. * sensors_temperatures() (not tested)
  49. * users() (not tested)
  50. * WindowsService.binpath() (not tested)
  51. * WindowsService.description() (not tested)
  52. * WindowsService.display_name() (not tested)
  53. * WindowsService.name() (not tested)
  54. * WindowsService.status() (not tested)
  55. * WindowsService.username() (not tested)
  56. In here we create a unicode path with a funky non-ASCII name and (where
  57. possible) make psutil return it back (e.g. on name(), exe(), open_files(),
  58. etc.) and make sure that:
  59. * psutil never crashes with UnicodeDecodeError
  60. * the returned path matches
  61. """
  62. import os
  63. import shutil
  64. import traceback
  65. import warnings
  66. from contextlib import closing
  67. from psutil import BSD
  68. from psutil import OPENBSD
  69. from psutil import POSIX
  70. from psutil import WINDOWS
  71. from psutil._compat import PY3
  72. from psutil._compat import u
  73. from psutil.tests import APPVEYOR
  74. from psutil.tests import ASCII_FS
  75. from psutil.tests import bind_unix_socket
  76. from psutil.tests import chdir
  77. from psutil.tests import CI_TESTING
  78. from psutil.tests import CIRRUS
  79. from psutil.tests import copyload_shared_lib
  80. from psutil.tests import create_exe
  81. from psutil.tests import get_testfn
  82. from psutil.tests import HAS_CONNECTIONS_UNIX
  83. from psutil.tests import HAS_ENVIRON
  84. from psutil.tests import HAS_MEMORY_MAPS
  85. from psutil.tests import INVALID_UNICODE_SUFFIX
  86. from psutil.tests import PsutilTestCase
  87. from psutil.tests import PYPY
  88. from psutil.tests import safe_mkdir
  89. from psutil.tests import safe_rmpath
  90. from psutil.tests import serialrun
  91. from psutil.tests import skip_on_access_denied
  92. from psutil.tests import spawn_testproc
  93. from psutil.tests import terminate
  94. from psutil.tests import TESTFN_PREFIX
  95. from psutil.tests import UNICODE_SUFFIX
  96. from psutil.tests import unittest
  97. import psutil
  98. if APPVEYOR:
  99. def safe_rmpath(path): # NOQA
  100. # TODO - this is quite random and I'm not sure why it happens,
  101. # nor I can reproduce it locally:
  102. # https://ci.appveyor.com/project/giampaolo/psutil/build/job/
  103. # jiq2cgd6stsbtn60
  104. # safe_rmpath() happens after reap_children() so this is weird
  105. # Perhaps wait_procs() on Windows is broken? Maybe because
  106. # of STILL_ACTIVE?
  107. # https://github.com/giampaolo/psutil/blob/
  108. # 68c7a70728a31d8b8b58f4be6c4c0baa2f449eda/psutil/arch/
  109. # windows/process_info.c#L146
  110. from psutil.tests import safe_rmpath as _rm
  111. try:
  112. return _rm(path)
  113. except WindowsError:
  114. traceback.print_exc()
  115. def try_unicode(suffix):
  116. """Return True if both the fs and the subprocess module can
  117. deal with a unicode file name.
  118. """
  119. if PY3:
  120. return True
  121. sproc = None
  122. testfn = get_testfn(suffix=suffix)
  123. try:
  124. safe_rmpath(testfn)
  125. create_exe(testfn)
  126. sproc = spawn_testproc(cmd=[testfn])
  127. shutil.copyfile(testfn, testfn + '-2')
  128. safe_rmpath(testfn + '-2')
  129. except (UnicodeEncodeError, IOError):
  130. return False
  131. else:
  132. return True
  133. finally:
  134. if sproc is not None:
  135. terminate(sproc)
  136. safe_rmpath(testfn)
  137. # ===================================================================
  138. # FS APIs
  139. # ===================================================================
  140. @serialrun
  141. @unittest.skipIf(ASCII_FS, "ASCII fs")
  142. @unittest.skipIf(PYPY and not PY3, "too much trouble on PYPY2")
  143. class _BaseFSAPIsTests(object):
  144. funky_suffix = None
  145. @classmethod
  146. def setUpClass(cls):
  147. cls.funky_name = get_testfn(suffix=cls.funky_suffix)
  148. create_exe(cls.funky_name)
  149. @classmethod
  150. def tearDownClass(cls):
  151. safe_rmpath(cls.funky_name)
  152. def expect_exact_path_match(self):
  153. raise NotImplementedError("must be implemented in subclass")
  154. # ---
  155. def test_proc_exe(self):
  156. subp = self.spawn_testproc(cmd=[self.funky_name])
  157. p = psutil.Process(subp.pid)
  158. exe = p.exe()
  159. self.assertIsInstance(exe, str)
  160. if self.expect_exact_path_match():
  161. self.assertEqual(os.path.normcase(exe),
  162. os.path.normcase(self.funky_name))
  163. def test_proc_name(self):
  164. subp = self.spawn_testproc(cmd=[self.funky_name])
  165. name = psutil.Process(subp.pid).name()
  166. self.assertIsInstance(name, str)
  167. if self.expect_exact_path_match():
  168. self.assertEqual(name, os.path.basename(self.funky_name))
  169. def test_proc_cmdline(self):
  170. subp = self.spawn_testproc(cmd=[self.funky_name])
  171. p = psutil.Process(subp.pid)
  172. cmdline = p.cmdline()
  173. for part in cmdline:
  174. self.assertIsInstance(part, str)
  175. if self.expect_exact_path_match():
  176. self.assertEqual(cmdline, [self.funky_name])
  177. def test_proc_cwd(self):
  178. dname = self.funky_name + "2"
  179. self.addCleanup(safe_rmpath, dname)
  180. safe_mkdir(dname)
  181. with chdir(dname):
  182. p = psutil.Process()
  183. cwd = p.cwd()
  184. self.assertIsInstance(p.cwd(), str)
  185. if self.expect_exact_path_match():
  186. self.assertEqual(cwd, dname)
  187. @unittest.skipIf(PYPY and WINDOWS, "fails on PYPY + WINDOWS")
  188. def test_proc_open_files(self):
  189. p = psutil.Process()
  190. start = set(p.open_files())
  191. with open(self.funky_name, 'rb'):
  192. new = set(p.open_files())
  193. path = (new - start).pop().path
  194. self.assertIsInstance(path, str)
  195. if BSD and not path:
  196. # XXX - see https://github.com/giampaolo/psutil/issues/595
  197. return self.skipTest("open_files on BSD is broken")
  198. if self.expect_exact_path_match():
  199. self.assertEqual(os.path.normcase(path),
  200. os.path.normcase(self.funky_name))
  201. @unittest.skipIf(not POSIX, "POSIX only")
  202. def test_proc_connections(self):
  203. name = self.get_testfn(suffix=self.funky_suffix)
  204. try:
  205. sock = bind_unix_socket(name)
  206. except UnicodeEncodeError:
  207. if PY3:
  208. raise
  209. else:
  210. raise unittest.SkipTest("not supported")
  211. with closing(sock):
  212. conn = psutil.Process().connections('unix')[0]
  213. self.assertIsInstance(conn.laddr, str)
  214. # AF_UNIX addr not set on OpenBSD
  215. if not OPENBSD and not CIRRUS: # XXX
  216. self.assertEqual(conn.laddr, name)
  217. @unittest.skipIf(not POSIX, "POSIX only")
  218. @unittest.skipIf(not HAS_CONNECTIONS_UNIX, "can't list UNIX sockets")
  219. @skip_on_access_denied()
  220. def test_net_connections(self):
  221. def find_sock(cons):
  222. for conn in cons:
  223. if os.path.basename(conn.laddr).startswith(TESTFN_PREFIX):
  224. return conn
  225. raise ValueError("connection not found")
  226. name = self.get_testfn(suffix=self.funky_suffix)
  227. try:
  228. sock = bind_unix_socket(name)
  229. except UnicodeEncodeError:
  230. if PY3:
  231. raise
  232. else:
  233. raise unittest.SkipTest("not supported")
  234. with closing(sock):
  235. cons = psutil.net_connections(kind='unix')
  236. # AF_UNIX addr not set on OpenBSD
  237. if not OPENBSD:
  238. conn = find_sock(cons)
  239. self.assertIsInstance(conn.laddr, str)
  240. self.assertEqual(conn.laddr, name)
  241. def test_disk_usage(self):
  242. dname = self.funky_name + "2"
  243. self.addCleanup(safe_rmpath, dname)
  244. safe_mkdir(dname)
  245. psutil.disk_usage(dname)
  246. @unittest.skipIf(not HAS_MEMORY_MAPS, "not supported")
  247. @unittest.skipIf(not PY3, "ctypes does not support unicode on PY2")
  248. @unittest.skipIf(PYPY, "unstable on PYPY")
  249. def test_memory_maps(self):
  250. # XXX: on Python 2, using ctypes.CDLL with a unicode path
  251. # opens a message box which blocks the test run.
  252. with copyload_shared_lib(suffix=self.funky_suffix) as funky_path:
  253. def normpath(p):
  254. return os.path.realpath(os.path.normcase(p))
  255. libpaths = [normpath(x.path)
  256. for x in psutil.Process().memory_maps()]
  257. # ...just to have a clearer msg in case of failure
  258. libpaths = [x for x in libpaths if TESTFN_PREFIX in x]
  259. self.assertIn(normpath(funky_path), libpaths)
  260. for path in libpaths:
  261. self.assertIsInstance(path, str)
  262. # https://travis-ci.org/giampaolo/psutil/jobs/440073249
  263. # @unittest.skipIf(PYPY and TRAVIS, "unreliable on PYPY + TRAVIS")
  264. # @unittest.skipIf(MACOS and TRAVIS, "unreliable on TRAVIS") # TODO
  265. @unittest.skipIf(not try_unicode(UNICODE_SUFFIX),
  266. "can't deal with unicode str")
  267. class TestFSAPIs(_BaseFSAPIsTests, PsutilTestCase):
  268. """Test FS APIs with a funky, valid, UTF8 path name."""
  269. funky_suffix = UNICODE_SUFFIX
  270. def expect_exact_path_match(self):
  271. # Do not expect psutil to correctly handle unicode paths on
  272. # Python 2 if os.listdir() is not able either.
  273. here = '.' if isinstance(self.funky_name, str) else u('.')
  274. with warnings.catch_warnings():
  275. warnings.simplefilter("ignore")
  276. return self.funky_name in os.listdir(here)
  277. @unittest.skipIf(CI_TESTING, "unreliable on CI")
  278. @unittest.skipIf(not try_unicode(INVALID_UNICODE_SUFFIX),
  279. "can't deal with invalid unicode str")
  280. class TestFSAPIsWithInvalidPath(_BaseFSAPIsTests, PsutilTestCase):
  281. """Test FS APIs with a funky, invalid path name."""
  282. funky_suffix = INVALID_UNICODE_SUFFIX
  283. @classmethod
  284. def expect_exact_path_match(cls):
  285. # Invalid unicode names are supposed to work on Python 2.
  286. return True
  287. # ===================================================================
  288. # Non fs APIs
  289. # ===================================================================
  290. class TestNonFSAPIS(PsutilTestCase):
  291. """Unicode tests for non fs-related APIs."""
  292. @unittest.skipIf(not HAS_ENVIRON, "not supported")
  293. @unittest.skipIf(PYPY and WINDOWS, "segfaults on PYPY + WINDOWS")
  294. def test_proc_environ(self):
  295. # Note: differently from others, this test does not deal
  296. # with fs paths. On Python 2 subprocess module is broken as
  297. # it's not able to handle with non-ASCII env vars, so
  298. # we use "è", which is part of the extended ASCII table
  299. # (unicode point <= 255).
  300. env = os.environ.copy()
  301. funky_str = UNICODE_SUFFIX if PY3 else 'è'
  302. env['FUNNY_ARG'] = funky_str
  303. sproc = self.spawn_testproc(env=env)
  304. p = psutil.Process(sproc.pid)
  305. env = p.environ()
  306. for k, v in env.items():
  307. self.assertIsInstance(k, str)
  308. self.assertIsInstance(v, str)
  309. self.assertEqual(env['FUNNY_ARG'], funky_str)
  310. if __name__ == '__main__':
  311. from psutil.tests.runner import run_from_name
  312. run_from_name(__file__)