@@ -121,30 +121,30 @@ cdef extern from "parser/tokenizer.h":
121121 io_callback cb_io
122122 io_cleanup cb_cleanup
123123
124- int chunksize # Number of bytes to prepare for each chunk
125- char * data # pointer to data to be processed
126- int datalen # amount of data available
127- int datapos
124+ int64_t chunksize # Number of bytes to prepare for each chunk
125+ char * data # pointer to data to be processed
126+ int64_t datalen # amount of data available
127+ int64_t datapos
128128
129129 # where to write out tokenized data
130130 char * stream
131- int stream_len
132- int stream_cap
131+ int64_t stream_len
132+ int64_t stream_cap
133133
134134 # Store words in (potentially ragged) matrix for now, hmm
135135 char ** words
136- int * word_starts # where we are in the stream
137- int words_len
138- int words_cap
136+ int64_t * word_starts # where we are in the stream
137+ int64_t words_len
138+ int64_t words_cap
139139
140- char * pword_start # pointer to stream start of current field
141- int word_start # position start of current field
140+ char * pword_start # pointer to stream start of current field
141+ int64_t word_start # position start of current field
142142
143- int * line_start # position in words for start of line
144- int * line_fields # Number of fields in each line
145- int lines # Number of lines observed
146- int file_lines # Number of file lines observed (with bad/skipped)
147- int lines_cap # Vector capacity
143+ int64_t * line_start # position in words for start of line
144+ int64_t * line_fields # Number of fields in each line
145+ int64_t lines # Number of lines observed
146+ int64_t file_lines # Number of lines observed (with bad/skipped)
147+ int64_t lines_cap # Vector capacity
148148
149149 # Tokenizing stuff
150150 ParserState state
@@ -177,14 +177,14 @@ cdef extern from "parser/tokenizer.h":
177177 # thousands separator (comma, period)
178178 char thousands
179179
180- int header # Boolean: 1: has header, 0: no header
181- int header_start # header row start
182- int header_end # header row end
180+ int header # Boolean: 1: has header, 0: no header
181+ int64_t header_start # header row start
182+ int64_t header_end # header row end
183183
184184 void * skipset
185185 PyObject * skipfunc
186186 int64_t skip_first_N_rows
187- int skipfooter
187+ int64_t skipfooter
188188 # pick one, depending on whether the converter requires GIL
189189 double (* double_converter_nogil)(const char * , char ** ,
190190 char , char , char , int ) nogil
@@ -195,12 +195,12 @@ cdef extern from "parser/tokenizer.h":
195195 char * warn_msg
196196 char * error_msg
197197
198- int skip_empty_lines
198+ int64_t skip_empty_lines
199199
200200 ctypedef struct coliter_t:
201201 char ** words
202- int * line_start
203- int col
202+ int64_t * line_start
203+ int64_t col
204204
205205 ctypedef struct uint_state:
206206 int seen_sint
@@ -210,7 +210,8 @@ cdef extern from "parser/tokenizer.h":
210210 void uint_state_init(uint_state * self )
211211 int uint64_conflict(uint_state * self )
212212
213- void coliter_setup(coliter_t * it, parser_t * parser, int i, int start) nogil
213+ void coliter_setup(coliter_t * it, parser_t * parser,
214+ int64_t i, int64_t start) nogil
214215 void COLITER_NEXT(coliter_t, const char * ) nogil
215216
216217 parser_t* parser_new()
@@ -289,14 +290,14 @@ cdef class TextReader:
289290 object true_values, false_values
290291 object handle
291292 bint na_filter, verbose, has_usecols, has_mi_columns
292- int parser_start
293+ int64_t parser_start
293294 list clocks
294295 char * c_encoding
295296 kh_str_t * false_set
296297 kh_str_t * true_set
297298
298299 cdef public:
299- int leading_cols, table_width, skipfooter, buffer_lines
300+ int64_t leading_cols, table_width, skipfooter, buffer_lines
300301 object allow_leading_cols
301302 object delimiter, converters, delim_whitespace
302303 object na_values
@@ -730,7 +731,8 @@ cdef class TextReader:
730731 Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
731732 char * word
732733 object name
733- int status, hr, data_line
734+ int status
735+ int64_t hr, data_line
734736 char * errors = " strict"
735737 cdef StringPath path = _string_path(self .c_encoding)
736738
@@ -949,8 +951,8 @@ cdef class TextReader:
949951
950952 cdef _read_rows(self , rows, bint trim):
951953 cdef:
952- int buffered_lines
953- int irows, footer = 0
954+ int64_t buffered_lines
955+ int64_t irows, footer = 0
954956
955957 self ._start_clock()
956958
@@ -1018,12 +1020,13 @@ cdef class TextReader:
10181020
10191021 def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
10201022 cdef:
1021- Py_ssize_t i, nused
1023+ int64_t i
1024+ int nused
10221025 kh_str_t * na_hashset = NULL
1023- int start, end
1026+ int64_t start, end
10241027 object name, na_flist, col_dtype = None
10251028 bint na_filter = 0
1026- Py_ssize_t num_cols
1029+ int64_t num_cols
10271030
10281031 start = self .parser_start
10291032
@@ -1195,7 +1198,7 @@ cdef class TextReader:
11951198 return col_res, na_count
11961199
11971200 cdef _convert_with_dtype(self , object dtype, Py_ssize_t i,
1198- int start, int end,
1201+ int64_t start, int64_t end,
11991202 bint na_filter,
12001203 bint user_dtype,
12011204 kh_str_t * na_hashset,
@@ -1275,7 +1278,7 @@ cdef class TextReader:
12751278 raise TypeError (" the dtype %s is not "
12761279 " supported for parsing" % dtype)
12771280
1278- cdef _string_convert(self , Py_ssize_t i, int start, int end,
1281+ cdef _string_convert(self , Py_ssize_t i, int64_t start, int64_t end,
12791282 bint na_filter, kh_str_t * na_hashset):
12801283
12811284 cdef StringPath path = _string_path(self .c_encoding)
@@ -1336,6 +1339,7 @@ cdef class TextReader:
13361339 kh_destroy_str(table)
13371340
13381341 cdef _get_column_name(self , Py_ssize_t i, Py_ssize_t nused):
1342+ cdef int64_t j
13391343 if self .has_usecols and self .names is not None :
13401344 if (not callable (self .usecols) and
13411345 len (self .names) == len (self .usecols)):
@@ -1427,8 +1431,8 @@ cdef inline StringPath _string_path(char *encoding):
14271431# ----------------------------------------------------------------------
14281432# Type conversions / inference support code
14291433
1430- cdef _string_box_factorize(parser_t * parser, int col,
1431- int line_start, int line_end,
1434+ cdef _string_box_factorize(parser_t * parser, int64_t col,
1435+ int64_t line_start, int64_t line_end,
14321436 bint na_filter, kh_str_t * na_hashset):
14331437 cdef:
14341438 int error, na_count = 0
@@ -1480,8 +1484,8 @@ cdef _string_box_factorize(parser_t *parser, int col,
14801484
14811485 return result, na_count
14821486
1483- cdef _string_box_utf8(parser_t * parser, int col,
1484- int line_start, int line_end,
1487+ cdef _string_box_utf8(parser_t * parser, int64_t col,
1488+ int64_t line_start, int64_t line_end,
14851489 bint na_filter, kh_str_t * na_hashset):
14861490 cdef:
14871491 int error, na_count = 0
@@ -1533,8 +1537,8 @@ cdef _string_box_utf8(parser_t *parser, int col,
15331537
15341538 return result, na_count
15351539
1536- cdef _string_box_decode(parser_t * parser, int col,
1537- int line_start, int line_end,
1540+ cdef _string_box_decode(parser_t * parser, int64_t col,
1541+ int64_t line_start, int64_t line_end,
15381542 bint na_filter, kh_str_t * na_hashset,
15391543 char * encoding):
15401544 cdef:
@@ -1592,8 +1596,8 @@ cdef _string_box_decode(parser_t *parser, int col,
15921596
15931597
15941598@ cython.boundscheck (False )
1595- cdef _categorical_convert(parser_t * parser, int col,
1596- int line_start, int line_end,
1599+ cdef _categorical_convert(parser_t * parser, int64_t col,
1600+ int64_t line_start, int64_t line_end,
15971601 bint na_filter, kh_str_t * na_hashset,
15981602 char * encoding):
15991603 " Convert column data into codes, categories"
@@ -1663,8 +1667,8 @@ cdef _categorical_convert(parser_t *parser, int col,
16631667 kh_destroy_str(table)
16641668 return np.asarray(codes), result, na_count
16651669
1666- cdef _to_fw_string(parser_t * parser, int col, int line_start,
1667- int line_end, size_t width):
1670+ cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
1671+ int64_t line_end, int64_t width):
16681672 cdef:
16691673 Py_ssize_t i
16701674 coliter_t it
@@ -1680,11 +1684,11 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
16801684
16811685 return result
16821686
1683- cdef inline void _to_fw_string_nogil(parser_t * parser, int col,
1684- int line_start, int line_end,
1687+ cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
1688+ int64_t line_start, int64_t line_end,
16851689 size_t width, char * data) nogil:
16861690 cdef:
1687- Py_ssize_t i
1691+ int64_t i
16881692 coliter_t it
16891693 const char * word = NULL
16901694
@@ -1699,7 +1703,8 @@ cdef char* cinf = b'inf'
16991703cdef char * cposinf = b' +inf'
17001704cdef char * cneginf = b' -inf'
17011705
1702- cdef _try_double(parser_t * parser, int col, int line_start, int line_end,
1706+ cdef _try_double(parser_t * parser, int64_t col,
1707+ int64_t line_start, int64_t line_end,
17031708 bint na_filter, kh_str_t * na_hashset, object na_flist):
17041709 cdef:
17051710 int error, na_count = 0
@@ -1808,7 +1813,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
18081813
18091814 return 0
18101815
1811- cdef _try_uint64(parser_t * parser, int col, int line_start, int line_end,
1816+ cdef _try_uint64(parser_t * parser, int64_t col,
1817+ int64_t line_start, int64_t line_end,
18121818 bint na_filter, kh_str_t * na_hashset):
18131819 cdef:
18141820 int error
@@ -1842,8 +1848,9 @@ cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end,
18421848
18431849 return result
18441850
1845- cdef inline int _try_uint64_nogil(parser_t * parser, int col, int line_start,
1846- int line_end, bint na_filter,
1851+ cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col,
1852+ int64_t line_start,
1853+ int64_t line_end, bint na_filter,
18471854 const kh_str_t * na_hashset,
18481855 uint64_t * data, uint_state * state) nogil:
18491856 cdef:
@@ -1879,7 +1886,8 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start,
18791886
18801887 return 0
18811888
1882- cdef _try_int64(parser_t * parser, int col, int line_start, int line_end,
1889+ cdef _try_int64(parser_t * parser, int64_t col,
1890+ int64_t line_start, int64_t line_end,
18831891 bint na_filter, kh_str_t * na_hashset):
18841892 cdef:
18851893 int error, na_count = 0
@@ -1906,8 +1914,9 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
19061914
19071915 return result, na_count
19081916
1909- cdef inline int _try_int64_nogil(parser_t * parser, int col, int line_start,
1910- int line_end, bint na_filter,
1917+ cdef inline int _try_int64_nogil(parser_t * parser, int64_t col,
1918+ int64_t line_start,
1919+ int64_t line_end, bint na_filter,
19111920 const kh_str_t * na_hashset, int64_t NA,
19121921 int64_t * data, int * na_count) nogil:
19131922 cdef:
@@ -1944,7 +1953,8 @@ cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start,
19441953
19451954 return 0
19461955
1947- cdef _try_bool(parser_t * parser, int col, int line_start, int line_end,
1956+ cdef _try_bool(parser_t * parser, int64_t col,
1957+ int64_t line_start, int64_t line_end,
19481958 bint na_filter, kh_str_t * na_hashset):
19491959 cdef:
19501960 int na_count
@@ -1966,8 +1976,9 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
19661976 return None , None
19671977 return result.view(np.bool_), na_count
19681978
1969- cdef inline int _try_bool_nogil(parser_t * parser, int col, int line_start,
1970- int line_end, bint na_filter,
1979+ cdef inline int _try_bool_nogil(parser_t * parser, int64_t col,
1980+ int64_t line_start,
1981+ int64_t line_end, bint na_filter,
19711982 const kh_str_t * na_hashset, uint8_t NA,
19721983 uint8_t * data, int * na_count) nogil:
19731984 cdef:
@@ -2006,7 +2017,8 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start,
20062017 data += 1
20072018 return 0
20082019
2009- cdef _try_bool_flex(parser_t * parser, int col, int line_start, int line_end,
2020+ cdef _try_bool_flex(parser_t * parser, int64_t col,
2021+ int64_t line_start, int64_t line_end,
20102022 bint na_filter, const kh_str_t * na_hashset,
20112023 const kh_str_t * true_hashset,
20122024 const kh_str_t * false_hashset):
@@ -2032,8 +2044,9 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
20322044 return None , None
20332045 return result.view(np.bool_), na_count
20342046
2035- cdef inline int _try_bool_flex_nogil(parser_t * parser, int col, int line_start,
2036- int line_end, bint na_filter,
2047+ cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col,
2048+ int64_t line_start,
2049+ int64_t line_end, bint na_filter,
20372050 const kh_str_t * na_hashset,
20382051 const kh_str_t * true_hashset,
20392052 const kh_str_t * false_hashset,
@@ -2251,8 +2264,8 @@ for k in list(na_values):
22512264 na_values[np.dtype(k)] = na_values[k]
22522265
22532266
2254- cdef _apply_converter(object f, parser_t * parser, int col,
2255- int line_start, int line_end,
2267+ cdef _apply_converter(object f, parser_t * parser, int64_t col,
2268+ int64_t line_start, int64_t line_end,
22562269 char * c_encoding):
22572270 cdef:
22582271 int error
@@ -2296,7 +2309,7 @@ def _to_structured_array(dict columns, object names, object usecols):
22962309
22972310 object name, fnames, field_type
22982311 Py_ssize_t i, offset, nfields, length
2299- int stride, elsize
2312+ int64_t stride, elsize
23002313 char * buf
23012314
23022315 if names is None :
@@ -2344,10 +2357,10 @@ def _to_structured_array(dict columns, object names, object usecols):
23442357
23452358 return recs
23462359
2347- cdef _fill_structured_column(char * dst, char * src, int elsize,
2348- int stride, int length, bint incref):
2360+ cdef _fill_structured_column(char * dst, char * src, int64_t elsize,
2361+ int64_t stride, int64_t length, bint incref):
23492362 cdef:
2350- Py_ssize_t i
2363+ int64_t i
23512364
23522365 if incref:
23532366 util.transfer_object_column(dst, src, stride, length)
0 commit comments