# -*- coding: utf-8 -*- from __future__ import print_function import numpy as np import pytest from pandas.compat import PY36, lrange, range from pandas import DataFrame, Index, MultiIndex, Series from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal # Column add, remove, delete. class TestDataFrameMutateColumns(TestData): def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) original = df.copy() result = df.assign(C=df.B / df.A) expected = df.copy() expected['C'] = [4, 2.5, 2] assert_frame_equal(result, expected) # lambda syntax result = df.assign(C=lambda x: x.B / x.A) assert_frame_equal(result, expected) # original is unmodified assert_frame_equal(df, original) # Non-Series array-like result = df.assign(C=[4, 2.5, 2]) assert_frame_equal(result, expected) # original is unmodified assert_frame_equal(df, original) result = df.assign(B=df.B / df.A) expected = expected.drop('B', axis=1).rename(columns={'C': 'B'}) assert_frame_equal(result, expected) # overwrite result = df.assign(A=df.A + df.B) expected = df.copy() expected['A'] = [5, 7, 9] assert_frame_equal(result, expected) # lambda result = df.assign(A=lambda x: x.A + x.B) assert_frame_equal(result, expected) def test_assign_multiple(self): df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=['A', 'B']) result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) expected = DataFrame([[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list('ABCDE')) assert_frame_equal(result, expected) def test_assign_order(self): # GH 9818 df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) result = df.assign(D=df.A + df.B, C=df.A - df.B) if PY36: expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list('ABDC')) else: expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list('ABCD')) assert_frame_equal(result, expected) result = df.assign(C=df.A - df.B, D=df.A + df.B) expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list('ABCD')) assert_frame_equal(result, expected) def test_assign_bad(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) # non-keyword argument with pytest.raises(TypeError): df.assign(lambda x: x.A) with pytest.raises(AttributeError): df.assign(C=df.A, D=df.A + df.C) @pytest.mark.skipif(PY36, reason="""Issue #14207: valid for python 3.6 and above""") def test_assign_dependent_old_python(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) # Key C does not exist at definition time of df with pytest.raises(KeyError): df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) with pytest.raises(KeyError): df.assign(C=df.A, D=lambda x: x['A'] + x['C']) @pytest.mark.skipif(not PY36, reason="""Issue #14207: not valid for python 3.5 and below""") def test_assign_dependent(self): df = DataFrame({'A': [1, 2], 'B': [3, 4]}) result = df.assign(C=df.A, D=lambda x: x['A'] + x['C']) expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list('ABCD')) assert_frame_equal(result, expected) result = df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list('ABCD')) assert_frame_equal(result, expected) def test_insert_error_msmgs(self): # GH 7432 df = DataFrame({'foo': ['a', 'b', 'c'], 'bar': [ 1, 2, 3], 'baz': ['d', 'e', 'f']}).set_index('foo') s = DataFrame({'foo': ['a', 'b', 'c', 'a'], 'fiz': [ 'g', 'h', 'i', 'j']}).set_index('foo') msg = 'cannot reindex from a duplicate axis' with pytest.raises(ValueError, match=msg): df['newcol'] = s # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=['a', 'b', 'c', 'd']) msg = 'incompatible index of inserted column with frame index' with pytest.raises(TypeError, match=msg): df['gr'] = df.groupby(['b', 'c']).count() def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=lrange(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=lrange(N)) assert_frame_equal(df, expected) def test_insert(self): df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=['c', 'b', 'a']) df.insert(0, 'foo', df['a']) tm.assert_index_equal(df.columns, Index(['foo', 'c', 'b', 'a'])) tm.assert_series_equal(df['a'], df['foo'], check_names=False) df.insert(2, 'bar', df['c']) tm.assert_index_equal(df.columns, Index(['foo', 'c', 'bar', 'b', 'a'])) tm.assert_almost_equal(df['c'], df['bar'], check_names=False) # diff dtype # new item df['x'] = df['a'].astype('float32') result = Series(dict(float32=1, float64=5)) assert (df.get_dtype_counts().sort_index() == result).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float32=2, float64=4)) assert (df.get_dtype_counts().sort_index() == result).all() df['y'] = df['a'].astype('int32') result = Series(dict(float32=2, float64=4, int32=1)) assert (df.get_dtype_counts().sort_index() == result).all() with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) pytest.raises(ValueError, df.insert, 1, 'c', df['b']) df.columns.name = 'some_name' # preserve columns name field df.insert(0, 'baz', df['c']) assert df.columns.name == 'some_name' # GH 13522 df = DataFrame(index=['A', 'B', 'C']) df['X'] = df.index df['X'] = ['x', 'y', 'z'] exp = DataFrame(data={'X': ['x', 'y', 'z']}, index=['A', 'B', 'C']) assert_frame_equal(df, exp) def test_delitem(self): del self.frame['A'] assert 'A' not in self.frame def test_delitem_multiindex(self): midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) df = DataFrame(np.random.randn(4, 4), columns=midx) assert len(df.columns) == 4 assert ('A', ) in df.columns assert 'A' in df.columns result = df['A'] assert isinstance(result, DataFrame) del df['A'] assert len(df.columns) == 2 # A still in the levels, BUT get a KeyError if trying # to delete assert ('A', ) not in df.columns with pytest.raises(KeyError): del df[('A',)] # behavior of dropped/deleted MultiIndex levels changed from # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' # levels which are dropped/deleted assert 'A' not in df.columns with pytest.raises(KeyError): del df['A'] def test_pop(self): self.frame.columns.name = 'baz' self.frame.pop('A') assert 'A' not in self.frame self.frame['foo'] = 'bar' self.frame.pop('foo') assert 'foo' not in self.frame assert self.frame.columns.name == 'baz' # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ 'A', 'B', 'C'], index=['X', 'Y']) b = a.pop('B') b += 1 # original frame expected = DataFrame([[1, 3], [4, 6]], columns=[ 'A', 'C'], index=['X', 'Y']) tm.assert_frame_equal(a, expected) # result expected = Series([2, 5], index=['X', 'Y'], name='B') + 1 tm.assert_series_equal(b, expected) def test_pop_non_unique_cols(self): df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) df.columns = ["a", "b", "a"] res = df.pop("a") assert type(res) == DataFrame assert len(res) == 2 assert len(df.columns) == 1 assert "b" in df.columns assert "a" not in df.columns assert len(df.index) == 2 def test_insert_column_bug_4032(self): # GH4032, inserting a column and renaming causing errors df = DataFrame({'b': [1.1, 2.2]}) df = df.rename(columns={}) df.insert(0, 'a', [1, 2]) result = df.rename(columns={}) str(result) expected = DataFrame([[1, 1.1], [2, 2.2]], columns=['a', 'b']) assert_frame_equal(result, expected) df.insert(0, 'c', [1.3, 2.3]) result = df.rename(columns={}) str(result) expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=['c', 'a', 'b']) assert_frame_equal(result, expected)