@@ -801,7 +801,6 @@ cdef class TextReader:
801
801
raise StopIteration
802
802
self ._end_clock(' Tokenization' )
803
803
804
-
805
804
self ._start_clock()
806
805
columns = self ._convert_column_data(rows = rows,
807
806
footer = footer,
@@ -840,11 +839,12 @@ cdef class TextReader:
840
839
841
840
def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
842
841
cdef:
843
- Py_ssize_t i, nused, ncols
842
+ Py_ssize_t i, nused
844
843
kh_str_t * na_hashset = NULL
845
844
int start, end
846
845
object name, na_flist
847
846
bint na_filter = 0
847
+ Py_ssize_t num_cols
848
848
849
849
start = self .parser_start
850
850
@@ -857,6 +857,22 @@ cdef class TextReader:
857
857
# if footer > 0:
858
858
# end -= footer
859
859
860
+ # print >> sys.stderr, self.table_width
861
+ # print >> sys.stderr, self.leading_cols
862
+ # print >> sys.stderr, self.parser.lines
863
+ # print >> sys.stderr, start
864
+ # print >> sys.stderr, end
865
+ # print >> sys.stderr, self.header
866
+ # print >> sys.stderr, "index"
867
+ num_cols = - 1
868
+ for i in range (self .parser.lines):
869
+ num_cols = (num_cols < self .parser.line_fields[i]) * self .parser.line_fields[i] + \
870
+ (num_cols >= self .parser.line_fields[i]) * num_cols
871
+
872
+ if self .table_width - self .leading_cols > num_cols:
873
+ raise CParserError(" Too many columns specified: expected %s and found %s " %
874
+ (self .table_width - self .leading_cols, num_cols))
875
+
860
876
results = {}
861
877
nused = 0
862
878
for i in range (self .table_width):
@@ -1446,7 +1462,6 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1446
1462
if na_filter:
1447
1463
for i in range (lines):
1448
1464
word = COLITER_NEXT(it)
1449
-
1450
1465
k = kh_get_str(na_hashset, word)
1451
1466
# in the hash table
1452
1467
if k != na_hashset.n_buckets:
@@ -1828,16 +1843,6 @@ cdef _apply_converter(object f, parser_t *parser, int col,
1828
1843
1829
1844
return lib.maybe_convert_objects(result)
1830
1845
1831
- # if issubclass(values.dtype.type, (np.number, np.bool_)):
1832
- # return values
1833
-
1834
- # # XXX
1835
- # na_values = set([''])
1836
- # try:
1837
- # return lib.maybe_convert_numeric(values, na_values, False)
1838
- # except Exception:
1839
- # na_count = lib.sanitize_objects(values, na_values, False)
1840
- # return result
1841
1846
1842
1847
def _to_structured_array (dict columns , object names ):
1843
1848
cdef:
0 commit comments