@@ -4450,6 +4450,115 @@ def test_to_csv_from_csv(self):
4450
4450
4451
4451
os .remove (path )
4452
4452
4453
+ def test_to_csv_moar (self ):
4454
+ from pandas .util .testing import makeCustomDataframe as mkdf
4455
+ path = '__tmp_to_csv_dupe_cols__'
4456
+ def _do_test (df ,path ,r_dtype = None ,c_dtype = None ,rnlvl = None ,cnlvl = None ):
4457
+ try :
4458
+ df .to_csv (path ,encoding = 'utf8' )
4459
+ recons = DataFrame .from_csv (path )
4460
+ except :
4461
+ os .remove (path )
4462
+ raise
4463
+ else :
4464
+ def _to_uni (x ):
4465
+ if not isinstance (x ,unicode ):
4466
+ return x .decode ('utf8' )
4467
+ return x
4468
+ if rnlvl :
4469
+ delta_lvl = [recons .icol (i ).values for i in range (rnlvl - 1 )]
4470
+ ix = MultiIndex .from_arrays ([list (recons .index )]+ delta_lvl )
4471
+ recons .index = ix
4472
+ recons = recons .iloc [:,rnlvl - 1 :]
4473
+
4474
+ if cnlvl :
4475
+ def stuple_to_tuple (x ):
4476
+ import re
4477
+ x = x .split ("," )
4478
+ x = map (lambda x : re .sub ("[\' \" \s\(\)]" ,"" ,x ),x )
4479
+ return x
4480
+
4481
+ cols = MultiIndex .from_tuples (map (stuple_to_tuple ,recons .columns ))
4482
+ recons .columns = cols
4483
+
4484
+ type_map = dict (i = 'i' ,f = 'f' ,s = 'O' ,u = 'O' ,dt = 'O' )
4485
+ if r_dtype :
4486
+ if r_dtype == 'u' : # unicode
4487
+ r_dtype = 'O'
4488
+ recons .index = np .array (map (_to_uni ,recons .index ),
4489
+ dtype = r_dtype )
4490
+ df .index = np .array (map (_to_uni ,df .index ),dtype = r_dtype )
4491
+ if r_dtype == 'dt' : # unicode
4492
+ r_dtype = 'O'
4493
+ recons .index = np .array (map (Timestamp ,recons .index ),
4494
+ dtype = r_dtype )
4495
+ df .index = np .array (map (Timestamp ,df .index ),dtype = r_dtype )
4496
+ else :
4497
+ r_dtype = type_map .get (r_dtype )
4498
+ recons .index = np .array (recons .index ,dtype = r_dtype )
4499
+ df .index = np .array (df .index ,dtype = r_dtype )
4500
+ if c_dtype :
4501
+ if c_dtype == 'u' :
4502
+ c_dtype = 'O'
4503
+ recons .columns = np .array (map (_to_uni ,recons .columns ),
4504
+ dtype = c_dtype )
4505
+ df .Columns = np .array (map (_to_uni ,df .columns ),dtype = c_dtype )
4506
+ elif c_dtype == 'dt' :
4507
+ c_dtype = 'O'
4508
+ recons .columns = np .array (map (Timestamp ,recons .columns ),
4509
+ dtype = c_dtype )
4510
+ df .Columns = np .array (map (Timestamp ,df .columns ),dtype = c_dtype )
4511
+ else :
4512
+ c_dtype = type_map .get (c_dtype )
4513
+ recons .columns = np .array (recons .columns ,dtype = c_dtype )
4514
+ df .columns = np .array (df .columns ,dtype = c_dtype )
4515
+
4516
+ assert_frame_equal (df , recons ,check_names = False )
4517
+
4518
+ N = 100
4519
+
4520
+ for ncols in [1 ,10 ,30 ]:
4521
+ base = int ((100000 / ncols or 1 ) or 1 )
4522
+ for nrows in [10 ,N - 2 ,N - 1 ,N ,N + 1 ,N + 2 ,2 * N - 2 ,2 * N - 1 ,2 * N ,2 * N + 1 ,2 * N + 2 ,
4523
+ base - 1 ,base ,base + 1 ]:
4524
+ print ( nrows ,ncols )
4525
+ _do_test (mkdf (nrows , ncols ),path )
4526
+
4527
+ for nrows in [10 ,N - 2 ,N - 1 ,N ,N + 1 ,N + 2 ]:
4528
+ df = mkdf (nrows , 10 )
4529
+ cols = list (df .columns )
4530
+ cols [:1 ] = ["dupe" ,"dupe" ]
4531
+ cols [- 1 :] = ["dupe" ,"dupe" ]
4532
+ ix = list (df .index )
4533
+ ix [:2 ] = ["rdupe" ,"rdupe" ]
4534
+ ix [- 2 :] = ["rdupe" ,"rdupe" ]
4535
+ print ( nrows )
4536
+
4537
+ df .index = ix
4538
+ _do_test (df ,path )
4539
+
4540
+ for r_idx_type in ['i' , 'f' ,'s' ,'u' ,'dt' ]:
4541
+ for c_idx_type in ['i' , 'f' ,'s' ,'u' ,'dt' ]:
4542
+ print (r_idx_type ,c_idx_type )
4543
+ _do_test (mkdf (100 , 1 ,r_idx_type = r_idx_type ,
4544
+ c_idx_type = c_idx_type ),path ,r_idx_type ,c_idx_type )
4545
+ _do_test (mkdf (100 , 2 ,r_idx_type = r_idx_type ,
4546
+ c_idx_type = c_idx_type ),path ,r_idx_type ,c_idx_type )
4547
+
4548
+ _do_test (DataFrame (index = range (10 )),path )
4549
+ _do_test (mkdf (50001 , 2 ,r_idx_nlevels = 2 ),path ,rnlvl = 2 )
4550
+ for ncols in [2 ,10 ,30 ]:
4551
+ base = int (100000 / ncols )
4552
+ for nrows in [10 ,N - 2 ,N - 1 ,N ,N + 1 ,N + 2 ,2 * N - 2 ,2 * N - 1 ,2 * N ,2 * N + 1 ,2 * N + 2 ,
4553
+ base - 1 ,base ,base + 1 ]:
4554
+ print (nrows , ncols )
4555
+ _do_test (mkdf (nrows , ncols ,r_idx_nlevels = 2 ),path ,rnlvl = 2 )
4556
+ _do_test (mkdf (nrows , ncols ,c_idx_nlevels = 2 ),path ,cnlvl = 2 )
4557
+ _do_test (mkdf (nrows , ncols ,r_idx_nlevels = 2 ,c_idx_nlevels = 2 ),
4558
+ path ,rnlvl = 2 ,cnlvl = 2 )
4559
+
4560
+
4561
+
4453
4562
def test_to_csv_from_csv_w_some_infs (self ):
4454
4563
path = '__%s__' % tm .rands (10 )
4455
4564
0 commit comments