@@ -121,30 +121,30 @@ cdef extern from "parser/tokenizer.h":
121121 io_callback cb_io
122122 io_cleanup cb_cleanup
123123
124- size_t chunksize # Number of bytes to prepare for each chunk
125- char * data # pointer to data to be processed
126- size_t datalen # amount of data available
127- size_t datapos
124+ int64_t chunksize # Number of bytes to prepare for each chunk
125+ char * data # pointer to data to be processed
126+ int64_t datalen # amount of data available
127+ int64_t datapos
128128
129129 # where to write out tokenized data
130130 char * stream
131- size_t stream_len
132- size_t stream_cap
131+ int64_t stream_len
132+ int64_t stream_cap
133133
134134 # Store words in (potentially ragged) matrix for now, hmm
135135 char ** words
136- size_t * word_starts # where we are in the stream
137- size_t words_len
138- size_t words_cap
136+ int64_t * word_starts # where we are in the stream
137+ int64_t words_len
138+ int64_t words_cap
139139
140- char * pword_start # pointer to stream start of current field
141- size_t word_start # position start of current field
140+ char * pword_start # pointer to stream start of current field
141+ int64_t word_start # position start of current field
142142
143- size_t * line_start # position in words for start of line
144- size_t * line_fields # Number of fields in each line
145- size_t lines # Number of lines observed
146- size_t file_lines # Number of file lines observed (with bad/skipped)
147- size_t lines_cap # Vector capacity
143+ int64_t * line_start # position in words for start of line
144+ int64_t * line_fields # Number of fields in each line
145+ int64_t lines # Number of lines observed
146+ int64_t file_lines # Number of file lines observed (with bad/skipped)
147+ int64_t lines_cap # Vector capacity
148148
149149 # Tokenizing stuff
150150 ParserState state
@@ -178,13 +178,13 @@ cdef extern from "parser/tokenizer.h":
178178 char thousands
179179
180180 int header # Boolean: 1: has header, 0: no header
181- ssize_t header_start # header row start
182- ssize_t header_end # header row end
181+ int64_t header_start # header row start
182+ int64_t header_end # header row end
183183
184184 void * skipset
185185 PyObject * skipfunc
186186 int64_t skip_first_N_rows
187- size_t skipfooter
187+ int64_t skipfooter
188188 # pick one, depending on whether the converter requires GIL
189189 double (* double_converter_nogil)(const char * , char ** ,
190190 char , char , char , int ) nogil
@@ -195,12 +195,12 @@ cdef extern from "parser/tokenizer.h":
195195 char * warn_msg
196196 char * error_msg
197197
198- size_t skip_empty_lines
198+ int64_t skip_empty_lines
199199
200200 ctypedef struct coliter_t:
201201 char ** words
202- size_t * line_start
203- size_t col
202+ int64_t * line_start
203+ int64_t col
204204
205205 ctypedef struct uint_state:
206206 int seen_sint
@@ -210,7 +210,7 @@ cdef extern from "parser/tokenizer.h":
210210 void uint_state_init(uint_state * self )
211211 int uint64_conflict(uint_state * self )
212212
213- void coliter_setup(coliter_t * it, parser_t * parser, size_t i, size_t start) nogil
213+ void coliter_setup(coliter_t * it, parser_t * parser, int64_t i, int64_t start) nogil
214214 void COLITER_NEXT(coliter_t, const char * ) nogil
215215
216216 parser_t* parser_new()
@@ -289,14 +289,14 @@ cdef class TextReader:
289289 object true_values, false_values
290290 object handle
291291 bint na_filter, verbose, has_usecols, has_mi_columns
292- size_t parser_start
292+ int64_t parser_start
293293 list clocks
294294 char * c_encoding
295295 kh_str_t * false_set
296296 kh_str_t * true_set
297297
298298 cdef public:
299- size_t leading_cols, table_width, skipfooter, buffer_lines
299+ int64_t leading_cols, table_width, skipfooter, buffer_lines
300300 object allow_leading_cols
301301 object delimiter, converters, delim_whitespace
302302 object na_values
@@ -731,7 +731,7 @@ cdef class TextReader:
731731 char * word
732732 object name
733733 int status
734- size_t hr, data_line
734+ int64_t hr, data_line
735735 char * errors = " strict"
736736 cdef StringPath path = _string_path(self .c_encoding)
737737
@@ -950,8 +950,8 @@ cdef class TextReader:
950950
951951 cdef _read_rows(self , rows, bint trim):
952952 cdef:
953- size_t buffered_lines
954- size_t irows, footer = 0
953+ int64_t buffered_lines
954+ int64_t irows, footer = 0
955955
956956 self ._start_clock()
957957
@@ -1019,13 +1019,13 @@ cdef class TextReader:
10191019
10201020 def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
10211021 cdef:
1022- size_t i
1022+ int64_t i
10231023 int nused
10241024 kh_str_t * na_hashset = NULL
1025- size_t start, end
1025+ int64_t start, end
10261026 object name, na_flist, col_dtype = None
10271027 bint na_filter = 0
1028- size_t num_cols
1028+ int64_t num_cols
10291029
10301030 start = self .parser_start
10311031
@@ -1038,7 +1038,7 @@ cdef class TextReader:
10381038 # if footer > 0:
10391039 # end -= footer
10401040
1041- num_cols = 0
1041+ num_cols = - 1
10421042 for i in range (self .parser.lines):
10431043 num_cols = (num_cols < self .parser.line_fields[i]) * \
10441044 self .parser.line_fields[i] + \
@@ -1197,7 +1197,7 @@ cdef class TextReader:
11971197 return col_res, na_count
11981198
11991199 cdef _convert_with_dtype(self , object dtype, Py_ssize_t i,
1200- size_t start, size_t end,
1200+ int64_t start, int64_t end,
12011201 bint na_filter,
12021202 bint user_dtype,
12031203 kh_str_t * na_hashset,
@@ -1277,7 +1277,7 @@ cdef class TextReader:
12771277 raise TypeError (" the dtype %s is not "
12781278 " supported for parsing" % dtype)
12791279
1280- cdef _string_convert(self , Py_ssize_t i, size_t start, size_t end,
1280+ cdef _string_convert(self , Py_ssize_t i, int64_t start, int64_t end,
12811281 bint na_filter, kh_str_t * na_hashset):
12821282
12831283 cdef StringPath path = _string_path(self .c_encoding)
@@ -1338,7 +1338,7 @@ cdef class TextReader:
13381338 kh_destroy_str(table)
13391339
13401340 cdef _get_column_name(self , Py_ssize_t i, Py_ssize_t nused):
1341- cdef int j
1341+ cdef int64_t j
13421342 if self .has_usecols and self .names is not None :
13431343 if (not callable (self .usecols) and
13441344 len (self .names) == len (self .usecols)):
@@ -1430,8 +1430,8 @@ cdef inline StringPath _string_path(char *encoding):
14301430# ----------------------------------------------------------------------
14311431# Type conversions / inference support code
14321432
1433- cdef _string_box_factorize(parser_t * parser, size_t col,
1434- size_t line_start, size_t line_end,
1433+ cdef _string_box_factorize(parser_t * parser, int64_t col,
1434+ int64_t line_start, int64_t line_end,
14351435 bint na_filter, kh_str_t * na_hashset):
14361436 cdef:
14371437 int error, na_count = 0
@@ -1483,8 +1483,8 @@ cdef _string_box_factorize(parser_t *parser, size_t col,
14831483
14841484 return result, na_count
14851485
1486- cdef _string_box_utf8(parser_t * parser, size_t col,
1487- size_t line_start, size_t line_end,
1486+ cdef _string_box_utf8(parser_t * parser, int64_t col,
1487+ int64_t line_start, int64_t line_end,
14881488 bint na_filter, kh_str_t * na_hashset):
14891489 cdef:
14901490 int error, na_count = 0
@@ -1536,8 +1536,8 @@ cdef _string_box_utf8(parser_t *parser, size_t col,
15361536
15371537 return result, na_count
15381538
1539- cdef _string_box_decode(parser_t * parser, size_t col,
1540- size_t line_start, size_t line_end,
1539+ cdef _string_box_decode(parser_t * parser, int64_t col,
1540+ int64_t line_start, int64_t line_end,
15411541 bint na_filter, kh_str_t * na_hashset,
15421542 char * encoding):
15431543 cdef:
@@ -1595,8 +1595,8 @@ cdef _string_box_decode(parser_t *parser, size_t col,
15951595
15961596
15971597@ cython.boundscheck (False )
1598- cdef _categorical_convert(parser_t * parser, size_t col,
1599- size_t line_start, size_t line_end,
1598+ cdef _categorical_convert(parser_t * parser, int64_t col,
1599+ int64_t line_start, int64_t line_end,
16001600 bint na_filter, kh_str_t * na_hashset,
16011601 char * encoding):
16021602 " Convert column data into codes, categories"
@@ -1666,8 +1666,8 @@ cdef _categorical_convert(parser_t *parser, size_t col,
16661666 kh_destroy_str(table)
16671667 return np.asarray(codes), result, na_count
16681668
1669- cdef _to_fw_string(parser_t * parser, size_t col, size_t line_start,
1670- size_t line_end, size_t width):
1669+ cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
1670+ int64_t line_end, int64_t width):
16711671 cdef:
16721672 Py_ssize_t i
16731673 coliter_t it
@@ -1683,11 +1683,11 @@ cdef _to_fw_string(parser_t *parser, size_t col, size_t line_start,
16831683
16841684 return result
16851685
1686- cdef inline void _to_fw_string_nogil(parser_t * parser, size_t col,
1687- size_t line_start, size_t line_end,
1686+ cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
1687+ int64_t line_start, int64_t line_end,
16881688 size_t width, char * data) nogil:
16891689 cdef:
1690- size_t i
1690+ int64_t i
16911691 coliter_t it
16921692 const char * word = NULL
16931693
@@ -1702,7 +1702,7 @@ cdef char* cinf = b'inf'
17021702cdef char * cposinf = b' +inf'
17031703cdef char * cneginf = b' -inf'
17041704
1705- cdef _try_double(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1705+ cdef _try_double(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
17061706 bint na_filter, kh_str_t * na_hashset, object na_flist):
17071707 cdef:
17081708 int error, na_count = 0
@@ -1811,7 +1811,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
18111811
18121812 return 0
18131813
1814- cdef _try_uint64(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1814+ cdef _try_uint64(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
18151815 bint na_filter, kh_str_t * na_hashset):
18161816 cdef:
18171817 int error
@@ -1845,8 +1845,8 @@ cdef _try_uint64(parser_t *parser, size_t col, size_t line_start, size_t line_en
18451845
18461846 return result
18471847
1848- cdef inline int _try_uint64_nogil(parser_t * parser, size_t col, size_t line_start,
1849- size_t line_end, bint na_filter,
1848+ cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col, int64_t line_start,
1849+ int64_t line_end, bint na_filter,
18501850 const kh_str_t * na_hashset,
18511851 uint64_t * data, uint_state * state) nogil:
18521852 cdef:
@@ -1882,7 +1882,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, size_t col, size_t line_star
18821882
18831883 return 0
18841884
1885- cdef _try_int64(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1885+ cdef _try_int64(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
18861886 bint na_filter, kh_str_t * na_hashset):
18871887 cdef:
18881888 int error, na_count = 0
@@ -1909,8 +1909,8 @@ cdef _try_int64(parser_t *parser, size_t col, size_t line_start, size_t line_end
19091909
19101910 return result, na_count
19111911
1912- cdef inline int _try_int64_nogil(parser_t * parser, size_t col, size_t line_start,
1913- size_t line_end, bint na_filter,
1912+ cdef inline int _try_int64_nogil(parser_t * parser, int64_t col, int64_t line_start,
1913+ int64_t line_end, bint na_filter,
19141914 const kh_str_t * na_hashset, int64_t NA,
19151915 int64_t * data, int * na_count) nogil:
19161916 cdef:
@@ -1947,7 +1947,7 @@ cdef inline int _try_int64_nogil(parser_t *parser, size_t col, size_t line_start
19471947
19481948 return 0
19491949
1950- cdef _try_bool(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1950+ cdef _try_bool(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
19511951 bint na_filter, kh_str_t * na_hashset):
19521952 cdef:
19531953 int na_count
@@ -1969,8 +1969,8 @@ cdef _try_bool(parser_t *parser, size_t col, size_t line_start, size_t line_end,
19691969 return None , None
19701970 return result.view(np.bool_), na_count
19711971
1972- cdef inline int _try_bool_nogil(parser_t * parser, size_t col, size_t line_start,
1973- size_t line_end, bint na_filter,
1972+ cdef inline int _try_bool_nogil(parser_t * parser, int64_t col, int64_t line_start,
1973+ int64_t line_end, bint na_filter,
19741974 const kh_str_t * na_hashset, uint8_t NA,
19751975 uint8_t * data, int * na_count) nogil:
19761976 cdef:
@@ -2009,7 +2009,7 @@ cdef inline int _try_bool_nogil(parser_t *parser, size_t col, size_t line_start,
20092009 data += 1
20102010 return 0
20112011
2012- cdef _try_bool_flex(parser_t * parser, size_t col, size_t line_start, size_t line_end,
2012+ cdef _try_bool_flex(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
20132013 bint na_filter, const kh_str_t * na_hashset,
20142014 const kh_str_t * true_hashset,
20152015 const kh_str_t * false_hashset):
@@ -2035,8 +2035,8 @@ cdef _try_bool_flex(parser_t *parser, size_t col, size_t line_start, size_t line
20352035 return None , None
20362036 return result.view(np.bool_), na_count
20372037
2038- cdef inline int _try_bool_flex_nogil(parser_t * parser, size_t col, size_t line_start,
2039- size_t line_end, bint na_filter,
2038+ cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col, int64_t line_start,
2039+ int64_t line_end, bint na_filter,
20402040 const kh_str_t * na_hashset,
20412041 const kh_str_t * true_hashset,
20422042 const kh_str_t * false_hashset,
@@ -2254,8 +2254,8 @@ for k in list(na_values):
22542254 na_values[np.dtype(k)] = na_values[k]
22552255
22562256
2257- cdef _apply_converter(object f, parser_t * parser, size_t col,
2258- size_t line_start, size_t line_end,
2257+ cdef _apply_converter(object f, parser_t * parser, int64_t col,
2258+ int64_t line_start, int64_t line_end,
22592259 char * c_encoding):
22602260 cdef:
22612261 int error
@@ -2299,7 +2299,7 @@ def _to_structured_array(dict columns, object names, object usecols):
22992299
23002300 object name, fnames, field_type
23012301 Py_ssize_t i, offset, nfields, length
2302- size_t stride, elsize
2302+ int64_t stride, elsize
23032303 char * buf
23042304
23052305 if names is None :
@@ -2347,10 +2347,10 @@ def _to_structured_array(dict columns, object names, object usecols):
23472347
23482348 return recs
23492349
2350- cdef _fill_structured_column(char * dst, char * src, size_t elsize,
2351- size_t stride, size_t length, bint incref):
2350+ cdef _fill_structured_column(char * dst, char * src, int64_t elsize,
2351+ int64_t stride, int64_t length, bint incref):
23522352 cdef:
2353- size_t i
2353+ int64_t i
23542354
23552355 if incref:
23562356 util.transfer_object_column(dst, src, stride, length)
0 commit comments