Skip to content

Commit 87a391e

Browse files
author
y-p
committed
TST: test the hell out of the new df.to_csv()
1 parent 616347c commit 87a391e

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

pandas/tests/test_frame.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4450,6 +4450,115 @@ def test_to_csv_from_csv(self):
44504450

44514451
os.remove(path)
44524452

4453+
def test_to_csv_moar(self):
4454+
from pandas.util.testing import makeCustomDataframe as mkdf
4455+
path = '__tmp_to_csv_dupe_cols__'
4456+
def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None):
4457+
try:
4458+
df.to_csv(path,encoding='utf8')
4459+
recons = DataFrame.from_csv(path)
4460+
except:
4461+
os.remove(path)
4462+
raise
4463+
else:
4464+
def _to_uni(x):
4465+
if not isinstance(x,unicode):
4466+
return x.decode('utf8')
4467+
return x
4468+
if rnlvl:
4469+
delta_lvl = [recons.icol(i).values for i in range(rnlvl-1)]
4470+
ix=MultiIndex.from_arrays([list(recons.index)]+delta_lvl)
4471+
recons.index = ix
4472+
recons = recons.iloc[:,rnlvl-1:]
4473+
4474+
if cnlvl:
4475+
def stuple_to_tuple(x):
4476+
import re
4477+
x = x.split(",")
4478+
x = map(lambda x: re.sub("[\'\"\s\(\)]","",x),x)
4479+
return x
4480+
4481+
cols=MultiIndex.from_tuples(map(stuple_to_tuple,recons.columns))
4482+
recons.columns = cols
4483+
4484+
type_map = dict(i='i',f='f',s='O',u='O',dt='O')
4485+
if r_dtype:
4486+
if r_dtype == 'u': # unicode
4487+
r_dtype='O'
4488+
recons.index = np.array(map(_to_uni,recons.index),
4489+
dtype=r_dtype )
4490+
df.index = np.array(map(_to_uni,df.index),dtype=r_dtype )
4491+
if r_dtype == 'dt': # unicode
4492+
r_dtype='O'
4493+
recons.index = np.array(map(Timestamp,recons.index),
4494+
dtype=r_dtype )
4495+
df.index = np.array(map(Timestamp,df.index),dtype=r_dtype )
4496+
else:
4497+
r_dtype= type_map.get(r_dtype)
4498+
recons.index = np.array(recons.index,dtype=r_dtype )
4499+
df.index = np.array(df.index,dtype=r_dtype )
4500+
if c_dtype:
4501+
if c_dtype == 'u':
4502+
c_dtype='O'
4503+
recons.columns = np.array(map(_to_uni,recons.columns),
4504+
dtype=c_dtype )
4505+
df.Columns = np.array(map(_to_uni,df.columns),dtype=c_dtype )
4506+
elif c_dtype == 'dt':
4507+
c_dtype='O'
4508+
recons.columns = np.array(map(Timestamp,recons.columns),
4509+
dtype=c_dtype )
4510+
df.Columns = np.array(map(Timestamp,df.columns),dtype=c_dtype )
4511+
else:
4512+
c_dtype= type_map.get(c_dtype)
4513+
recons.columns = np.array(recons.columns,dtype=c_dtype )
4514+
df.columns = np.array(df.columns,dtype=c_dtype )
4515+
4516+
assert_frame_equal(df, recons,check_names=False)
4517+
4518+
N = 100
4519+
4520+
for ncols in [1,10,30]:
4521+
base = int((100000/ ncols or 1) or 1)
4522+
for nrows in [10,N-2,N-1,N,N+1,N+2,2*N-2,2*N-1,2*N,2*N+1,2*N+2,
4523+
base-1,base,base+1]:
4524+
print( nrows,ncols)
4525+
_do_test(mkdf(nrows, ncols),path)
4526+
4527+
for nrows in [10,N-2,N-1,N,N+1,N+2]:
4528+
df = mkdf(nrows, 10)
4529+
cols = list(df.columns)
4530+
cols[:1] = ["dupe","dupe"]
4531+
cols[-1:] = ["dupe","dupe"]
4532+
ix = list(df.index)
4533+
ix[:2] = ["rdupe","rdupe"]
4534+
ix[-2:] = ["rdupe","rdupe"]
4535+
print( nrows)
4536+
4537+
df.index=ix
4538+
_do_test(df,path)
4539+
4540+
for r_idx_type in ['i', 'f','s','u','dt']:
4541+
for c_idx_type in ['i', 'f','s','u','dt']:
4542+
print(r_idx_type,c_idx_type)
4543+
_do_test(mkdf(100, 1,r_idx_type=r_idx_type,
4544+
c_idx_type=c_idx_type),path,r_idx_type,c_idx_type)
4545+
_do_test(mkdf(100, 2,r_idx_type=r_idx_type,
4546+
c_idx_type=c_idx_type),path,r_idx_type,c_idx_type)
4547+
4548+
_do_test(DataFrame(index=range(10)),path)
4549+
_do_test(mkdf(50001, 2,r_idx_nlevels=2),path,rnlvl=2)
4550+
for ncols in [2,10,30]:
4551+
base = int(100000/ncols)
4552+
for nrows in [10,N-2,N-1,N,N+1,N+2,2*N-2,2*N-1,2*N,2*N+1,2*N+2,
4553+
base-1,base,base+1]:
4554+
print(nrows, ncols)
4555+
_do_test(mkdf(nrows, ncols,r_idx_nlevels=2),path,rnlvl=2)
4556+
_do_test(mkdf(nrows, ncols,c_idx_nlevels=2),path,cnlvl=2)
4557+
_do_test(mkdf(nrows, ncols,r_idx_nlevels=2,c_idx_nlevels=2),
4558+
path,rnlvl=2,cnlvl=2)
4559+
4560+
4561+
44534562
def test_to_csv_from_csv_w_some_infs(self):
44544563
path = '__%s__' % tm.rands(10)
44554564

0 commit comments

Comments
 (0)