test_clipboard.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. # -*- coding: utf-8 -*-
  2. from textwrap import dedent
  3. import numpy as np
  4. from numpy.random import randint
  5. import pytest
  6. from pandas.compat import PY2
  7. import pandas as pd
  8. from pandas import DataFrame, get_option, read_clipboard
  9. from pandas.util import testing as tm
  10. from pandas.util.testing import makeCustomDataframe as mkdf
  11. from pandas.io.clipboard.exceptions import PyperclipException
  12. try:
  13. DataFrame({'A': [1, 2]}).to_clipboard()
  14. _DEPS_INSTALLED = 1
  15. except (PyperclipException, RuntimeError):
  16. _DEPS_INSTALLED = 0
  17. def build_kwargs(sep, excel):
  18. kwargs = {}
  19. if excel != 'default':
  20. kwargs['excel'] = excel
  21. if sep != 'default':
  22. kwargs['sep'] = sep
  23. return kwargs
  24. @pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
  25. 'colwidth', 'mixed', 'float', 'int'])
  26. def df(request):
  27. data_type = request.param
  28. if data_type == 'delims':
  29. return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
  30. 'b': ['hi\'j', 'k\'\'lm']})
  31. elif data_type == 'utf8':
  32. return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
  33. 'b': ['øπ∆˚¬', 'œ∑´®']})
  34. elif data_type == 'string':
  35. return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
  36. c_idx_names=[None], r_idx_names=[None])
  37. elif data_type == 'long':
  38. max_rows = get_option('display.max_rows')
  39. return mkdf(max_rows + 1, 3,
  40. data_gen_f=lambda *args: randint(2),
  41. c_idx_type='s', r_idx_type='i',
  42. c_idx_names=[None], r_idx_names=[None])
  43. elif data_type == 'nonascii':
  44. return pd.DataFrame({'en': 'in English'.split(),
  45. 'es': 'en español'.split()})
  46. elif data_type == 'colwidth':
  47. _cw = get_option('display.max_colwidth') + 1
  48. return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
  49. c_idx_type='s', r_idx_type='i',
  50. c_idx_names=[None], r_idx_names=[None])
  51. elif data_type == 'mixed':
  52. return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
  53. 'b': np.arange(1, 6),
  54. 'c': list('abcde')})
  55. elif data_type == 'float':
  56. return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
  57. c_idx_type='s', r_idx_type='i',
  58. c_idx_names=[None], r_idx_names=[None])
  59. elif data_type == 'int':
  60. return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
  61. c_idx_type='s', r_idx_type='i',
  62. c_idx_names=[None], r_idx_names=[None])
  63. else:
  64. raise ValueError
  65. @pytest.fixture
  66. def mock_clipboard(monkeypatch, request):
  67. """Fixture mocking clipboard IO.
  68. This mocks pandas.io.clipboard.clipboard_get and
  69. pandas.io.clipboard.clipboard_set.
  70. This uses a local dict for storing data. The dictionary
  71. key used is the test ID, available with ``request.node.name``.
  72. This returns the local dictionary, for direct manipulation by
  73. tests.
  74. """
  75. # our local clipboard for tests
  76. _mock_data = {}
  77. def _mock_set(data):
  78. _mock_data[request.node.name] = data
  79. def _mock_get():
  80. return _mock_data[request.node.name]
  81. monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
  82. monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
  83. yield _mock_data
  84. @pytest.mark.clipboard
  85. def test_mock_clipboard(mock_clipboard):
  86. import pandas.io.clipboard
  87. pandas.io.clipboard.clipboard_set("abc")
  88. assert "abc" in set(mock_clipboard.values())
  89. result = pandas.io.clipboard.clipboard_get()
  90. assert result == "abc"
  91. @pytest.mark.single
  92. @pytest.mark.clipboard
  93. @pytest.mark.skipif(not _DEPS_INSTALLED,
  94. reason="clipboard primitives not installed")
  95. @pytest.mark.usefixtures("mock_clipboard")
  96. class TestClipboard(object):
  97. def check_round_trip_frame(self, data, excel=None, sep=None,
  98. encoding=None):
  99. data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
  100. result = read_clipboard(sep=sep or '\t', index_col=0,
  101. encoding=encoding)
  102. tm.assert_frame_equal(data, result, check_dtype=False)
  103. # Test that default arguments copy as tab delimited
  104. def test_round_trip_frame(self, df):
  105. self.check_round_trip_frame(df)
  106. # Test that explicit delimiters are respected
  107. @pytest.mark.parametrize('sep', ['\t', ',', '|'])
  108. def test_round_trip_frame_sep(self, df, sep):
  109. self.check_round_trip_frame(df, sep=sep)
  110. # Test white space separator
  111. def test_round_trip_frame_string(self, df):
  112. df.to_clipboard(excel=False, sep=None)
  113. result = read_clipboard()
  114. assert df.to_string() == result.to_string()
  115. assert df.shape == result.shape
  116. # Two character separator is not supported in to_clipboard
  117. # Test that multi-character separators are not silently passed
  118. def test_excel_sep_warning(self, df):
  119. with tm.assert_produces_warning():
  120. df.to_clipboard(excel=True, sep=r'\t')
  121. # Separator is ignored when excel=False and should produce a warning
  122. def test_copy_delim_warning(self, df):
  123. with tm.assert_produces_warning():
  124. df.to_clipboard(excel=False, sep='\t')
  125. # Tests that the default behavior of to_clipboard is tab
  126. # delimited and excel="True"
  127. @pytest.mark.parametrize('sep', ['\t', None, 'default'])
  128. @pytest.mark.parametrize('excel', [True, None, 'default'])
  129. def test_clipboard_copy_tabs_default(self, sep, excel, df, request,
  130. mock_clipboard):
  131. kwargs = build_kwargs(sep, excel)
  132. df.to_clipboard(**kwargs)
  133. if PY2:
  134. # to_clipboard copies unicode, to_csv produces bytes. This is
  135. # expected behavior
  136. result = mock_clipboard[request.node.name].encode('utf-8')
  137. expected = df.to_csv(sep='\t')
  138. assert result == expected
  139. else:
  140. assert mock_clipboard[request.node.name] == df.to_csv(sep='\t')
  141. # Tests reading of white space separated tables
  142. @pytest.mark.parametrize('sep', [None, 'default'])
  143. @pytest.mark.parametrize('excel', [False])
  144. def test_clipboard_copy_strings(self, sep, excel, df):
  145. kwargs = build_kwargs(sep, excel)
  146. df.to_clipboard(**kwargs)
  147. result = read_clipboard(sep=r'\s+')
  148. assert result.to_string() == df.to_string()
  149. assert df.shape == result.shape
  150. def test_read_clipboard_infer_excel(self, request,
  151. mock_clipboard):
  152. # gh-19010: avoid warnings
  153. clip_kwargs = dict(engine="python")
  154. text = dedent("""
  155. John James Charlie Mingus
  156. 1 2
  157. 4 Harry Carney
  158. """.strip())
  159. mock_clipboard[request.node.name] = text
  160. df = pd.read_clipboard(**clip_kwargs)
  161. # excel data is parsed correctly
  162. assert df.iloc[1][1] == 'Harry Carney'
  163. # having diff tab counts doesn't trigger it
  164. text = dedent("""
  165. a\t b
  166. 1 2
  167. 3 4
  168. """.strip())
  169. mock_clipboard[request.node.name] = text
  170. res = pd.read_clipboard(**clip_kwargs)
  171. text = dedent("""
  172. a b
  173. 1 2
  174. 3 4
  175. """.strip())
  176. mock_clipboard[request.node.name] = text
  177. exp = pd.read_clipboard(**clip_kwargs)
  178. tm.assert_frame_equal(res, exp)
  179. def test_invalid_encoding(self, df):
  180. # test case for testing invalid encoding
  181. with pytest.raises(ValueError):
  182. df.to_clipboard(encoding='ascii')
  183. with pytest.raises(NotImplementedError):
  184. pd.read_clipboard(encoding='ascii')
  185. @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
  186. def test_round_trip_valid_encodings(self, enc, df):
  187. self.check_round_trip_frame(df, encoding=enc)