46
46
converters : dict. optional
47
47
Dict of functions for converting values in certain columns. Keys can either
48
48
be integers or column labels
49
+ verbose : boolean, default False
50
+ Indicate number of NA values placed in non-numeric columns
49
51
50
52
Returns
51
53
-------
93
95
def read_csv (filepath_or_buffer , sep = None , header = 0 , index_col = None , names = None ,
94
96
skiprows = None , na_values = None , parse_dates = False ,
95
97
date_parser = None , nrows = None , iterator = False , chunksize = None ,
96
- skip_footer = 0 , converters = None ):
98
+ skip_footer = 0 , converters = None , verbose = False ):
97
99
if hasattr (filepath_or_buffer , 'read' ):
98
100
f = filepath_or_buffer
99
101
else :
@@ -114,7 +116,8 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
114
116
delimiter = sep ,
115
117
chunksize = chunksize ,
116
118
skip_footer = skip_footer ,
117
- converters = converters )
119
+ converters = converters ,
120
+ verbose = verbose )
118
121
119
122
if nrows is not None :
120
123
return parser .get_chunk (nrows )
@@ -127,13 +130,14 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
127
130
def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , index_col = None ,
128
131
names = None , skiprows = None , na_values = None , parse_dates = False ,
129
132
date_parser = None , nrows = None , iterator = False , chunksize = None ,
130
- skip_footer = 0 , converters = None ):
133
+ skip_footer = 0 , converters = None , verbose = False ):
131
134
return read_csv (filepath_or_buffer , sep = sep , header = header ,
132
135
skiprows = skiprows , index_col = index_col ,
133
136
na_values = na_values , date_parser = date_parser ,
134
137
names = names , parse_dates = parse_dates ,
135
138
nrows = nrows , iterator = iterator , chunksize = chunksize ,
136
- skip_footer = skip_footer , converters = converters )
139
+ skip_footer = skip_footer , converters = converters ,
140
+ verbose = verbose )
137
141
138
142
def read_clipboard (** kwargs ): # pragma: no cover
139
143
"""
@@ -196,7 +200,7 @@ class TextParser(object):
196
200
def __init__ (self , f , delimiter = None , names = None , header = 0 ,
197
201
index_col = None , na_values = None , parse_dates = False ,
198
202
date_parser = None , chunksize = None , skiprows = None ,
199
- skip_footer = 0 , converters = None ):
203
+ skip_footer = 0 , converters = None , verbose = False ):
200
204
"""
201
205
Workhorse function for processing nested list into DataFrame
202
206
@@ -215,6 +219,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
215
219
self .skiprows = set () if skiprows is None else set (skiprows )
216
220
self .skip_footer = skip_footer
217
221
self .delimiter = delimiter
222
+ self .verbose = verbose
218
223
219
224
if converters is not None :
220
225
assert (isinstance (converters , dict ))
@@ -412,14 +417,17 @@ def get_chunk(self, rows=None):
412
417
if np .isscalar (self .index_col ):
413
418
if self .parse_dates :
414
419
index = lib .try_parse_dates (index , parser = self .date_parser )
415
- index = Index (_convert_types (index , self .na_values ),
416
- name = self .index_name )
420
+ index , na_count = _convert_types (index , self .na_values )
421
+ index = Index (index , name = self .index_name )
422
+ if self .verbose and na_count :
423
+ print 'Found %d NA values in the index' % na_count
417
424
else :
418
425
arrays = []
419
426
for arr in index :
420
427
if self .parse_dates :
421
428
arr = lib .try_parse_dates (arr , parser = self .date_parser )
422
- arrays .append (_convert_types (arr , self .na_values ))
429
+ arr , _ = _convert_types (arr , self .na_values )
430
+ arrays .append (arr )
423
431
index = MultiIndex .from_arrays (arrays , names = self .index_name )
424
432
else :
425
433
index = Index (np .arange (len (content )))
@@ -442,7 +450,7 @@ def get_chunk(self, rows=None):
442
450
result = result .astype ('O' )
443
451
data [col ] = result
444
452
445
- data = _convert_to_ndarrays (data , self .na_values )
453
+ data = _convert_to_ndarrays (data , self .na_values , self . verbose )
446
454
447
455
return DataFrame (data = data , columns = self .columns , index = index )
448
456
@@ -483,24 +491,30 @@ def _get_lines(self, rows=None):
483
491
484
492
return lines
485
493
486
- def _convert_to_ndarrays (dct , na_values ):
494
+ def _convert_to_ndarrays (dct , na_values , verbose = False ):
487
495
result = {}
488
496
for c , values in dct .iteritems ():
489
- result [c ] = _convert_types (values , na_values )
497
+ cvals , na_count = _convert_types (values , na_values )
498
+ result [c ] = cvals
499
+ if verbose and na_count :
500
+ print 'Filled %d NA values in column %s' % (na_count , str (c ))
490
501
return result
491
502
492
503
def _convert_types (values , na_values ):
504
+ na_count = 0
493
505
if issubclass (values .dtype .type , (np .number , np .bool_ )):
494
- return values
506
+ return values , na_count
495
507
496
508
try :
497
- values = lib .maybe_convert_numeric (values , na_values )
509
+ result = lib .maybe_convert_numeric (values , na_values )
498
510
except Exception :
499
- lib .sanitize_objects (values , na_values )
511
+ na_count = lib .sanitize_objects (values , na_values )
512
+ result = values
500
513
501
- if values .dtype == np .object_ :
502
- return lib .maybe_convert_bool (values )
503
- return values
514
+ if result .dtype == np .object_ :
515
+ result = lib .maybe_convert_bool (values )
516
+
517
+ return result , na_count
504
518
505
519
#-------------------------------------------------------------------------------
506
520
# ExcelFile class
0 commit comments