pandas-dev · jreback · Jan 25, 2018 · Jan 24, 2018 · jreback · Jan 25, 2018
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -7,9 +7,8 @@ import warnings
 
 from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
 
-from libc.stdio cimport fopen, fclose
-from libc.stdlib cimport malloc, free
-from libc.string cimport strncpy, strlen, strcmp, strcasecmp
+from libc.stdlib cimport free
+from libc.string cimport strncpy, strlen, strcasecmp
 
 cimport cython
 from cython cimport Py_ssize_t
@@ -27,9 +26,6 @@ cdef extern from "Python.h":
     object PyUnicode_Decode(char *v, Py_ssize_t size, char *encoding,
                             char *errors)
 
-cdef extern from "stdlib.h":
-    void memcpy(void *dst, void *src, size_t n)
-
 
 import numpy as np
 cimport numpy as cnp
@@ -50,7 +46,7 @@ from khash cimport (
 
 import pandas.compat as compat
 from pandas.core.dtypes.common import (
-    is_categorical_dtype, CategoricalDtype,
+    is_categorical_dtype,
     is_integer_dtype, is_float_dtype,
     is_bool_dtype, is_object_dtype,
     is_datetime64_dtype,
@@ -90,9 +86,6 @@ try:
 except NameError:
     basestring = str
 
-cdef extern from "src/numpy_helper.h":
-    void transfer_object_column(char *dst, char *src, size_t stride,
-                                size_t length)
 
 cdef extern from "parser/tokenizer.h":
 
@@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h":
 
     int parser_trim_buffers(parser_t *self)
 
-    void debug_print_parser(parser_t *self)
-
     int tokenize_all_rows(parser_t *self) nogil
     int tokenize_nrows(parser_t *self, size_t nrows) nogil
 
@@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h":
     double round_trip(const char *p, char **q, char decimal, char sci,
                       char tsep, int skip_trailing) nogil
 
-    int to_longlong(char *item, long long *p_value) nogil
     int to_boolean(const char *item, uint8_t *val) nogil
 
 
@@ -875,9 +865,6 @@ cdef class TextReader:
 
         return header, field_count
 
-    cdef _implicit_index_count(self):
-        pass
-
     def read(self, rows=None):
         """
         rows=None --> read all rows
@@ -997,9 +984,6 @@ cdef class TextReader:
 
         return columns
 
-    def debug_print(self):
-        debug_print_parser(self.parser)
-
     cdef _start_clock(self):
         self.clocks.append(time.time())
 
@@ -1346,6 +1330,7 @@ cdef class TextReader:
             else:
                 return None
 
+
 cdef object _true_values = [b'True', b'TRUE', b'true']
 cdef object _false_values = [b'False', b'FALSE', b'false']
 
@@ -1375,21 +1360,6 @@ cdef asbytes(object o):
 _NA_VALUES = _ensure_encoded(list(com._NA_VALUES))
 
 
-def _is_file_like(obj):
-    if PY3:
-        import io
-        if isinstance(obj, io.TextIOWrapper):
-            raise ParserError('Cannot handle open unicode files (yet)')
-
-        # BufferedReader is a byte reader for Python 3
-        file = io.BufferedReader
-    else:
-        import __builtin__
-        file = __builtin__.file
-
-    return isinstance(obj, (basestring, file))
-
-
 def _maybe_upcast(arr):
     """
 
@@ -1479,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col,
 
     return result, na_count
 
+
 cdef _string_box_utf8(parser_t *parser, int64_t col,
                       int64_t line_start, int64_t line_end,
                       bint na_filter, kh_str_t *na_hashset):
@@ -1532,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
 
     return result, na_count
 
+
 cdef _string_box_decode(parser_t *parser, int64_t col,
                         int64_t line_start, int64_t line_end,
                         bint na_filter, kh_str_t *na_hashset,
@@ -1662,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
     kh_destroy_str(table)
     return np.asarray(codes), result, na_count
 
+
 cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
                    int64_t line_end, int64_t width):
     cdef:
@@ -1679,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
 
     return result
 
+
 cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
                                      int64_t line_start, int64_t line_end,
                                      size_t width, char *data) nogil:
@@ -1694,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
         strncpy(data, word, width)
         data += width
 
+
 cdef char* cinf = b'inf'
 cdef char* cposinf = b'+inf'
 cdef char* cneginf = b'-inf'
 
+
 cdef _try_double(parser_t *parser, int64_t col,
                  int64_t line_start, int64_t line_end,
                  bint na_filter, kh_str_t *na_hashset, object na_flist):
@@ -1738,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col,
         return None, None
     return result, na_count
 
+
 cdef inline int _try_double_nogil(parser_t *parser,
                                   double (*double_converter)(
                                       const char *, char **, char,
@@ -1808,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
 
     return 0
 
+
 cdef _try_uint64(parser_t *parser, int64_t col,
                  int64_t line_start, int64_t line_end,
                  bint na_filter, kh_str_t *na_hashset):
@@ -1843,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col,
 
     return result
 
+
 cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
                                   int64_t line_start,
                                   int64_t line_end, bint na_filter,
@@ -1881,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
 
     return 0
 
+
 cdef _try_int64(parser_t *parser, int64_t col,
                 int64_t line_start, int64_t line_end,
                 bint na_filter, kh_str_t *na_hashset):
@@ -1909,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col,
 
     return result, na_count
 
+
 cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
                                  int64_t line_start,
                                  int64_t line_end, bint na_filter,
@@ -1948,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
 
     return 0
 
-cdef _try_bool(parser_t *parser, int64_t col,
-               int64_t line_start, int64_t line_end,
-               bint na_filter, kh_str_t *na_hashset):
-    cdef:
-        int na_count
-        Py_ssize_t lines = line_end - line_start
-        uint8_t *data
-        cnp.ndarray[cnp.uint8_t, ndim=1] result
-
-        uint8_t NA = na_values[np.bool_]
-
-    result = np.empty(lines)
-    data = <uint8_t *> result.data
-
-    with nogil:
-        error = _try_bool_nogil(parser, col, line_start,
-                                line_end, na_filter,
-                                na_hashset, NA, data,
-                                &na_count)
-    if error != 0:
-        return None, None
-    return result.view(np.bool_), na_count
-
-cdef inline int _try_bool_nogil(parser_t *parser, int64_t col,
-                                int64_t line_start,
-                                int64_t line_end, bint na_filter,
-                                const kh_str_t *na_hashset, uint8_t NA,
-                                uint8_t *data, int *na_count) nogil:
-    cdef:
-        int error
-        Py_ssize_t i, lines = line_end - line_start
-        coliter_t it
-        const char *word = NULL
-        khiter_t k
-    na_count[0] = 0
-
-    coliter_setup(&it, parser, col, line_start)
-
-    if na_filter:
-        for i in range(lines):
-            COLITER_NEXT(it, word)
-
-            k = kh_get_str(na_hashset, word)
-            # in the hash table
-            if k != na_hashset.n_buckets:
-                na_count[0] += 1
-                data[0] = NA
-                data += 1
-                continue
-
-            error = to_boolean(word, data)
-            if error != 0:
-                return error
-            data += 1
-    else:
-        for i in range(lines):
-            COLITER_NEXT(it, word)
-
-            error = to_boolean(word, data)
-            if error != 0:
-                return error
-            data += 1
-    return 0
 
 cdef _try_bool_flex(parser_t *parser, int64_t col,
                     int64_t line_start, int64_t line_end,
@@ -2039,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col,
         return None, None
     return result.view(np.bool_), na_count
 
+
 cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col,
                                      int64_t line_start,
                                      int64_t line_end, bint na_filter,
@@ -2131,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
 
     return table
 
+
 cdef kh_float64_t* kset_float64_from_list(values) except NULL:
     # caller takes responsibility for freeing the hash table
     cdef:

diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h
@@ -75,22 +75,6 @@ PANDAS_INLINE PyObject* char_to_string(char* data) {
 #endif
 }
 
-void transfer_object_column(char* dst, char* src, size_t stride,
-                            size_t length) {
-    size_t i;
-    size_t sz = sizeof(PyObject*);
-
-    for (i = 0; i < length; ++i) {
-        // uninitialized data
-
-        // Py_XDECREF(*((PyObject**) dst));
-
-        memcpy(dst, src, sz);
-        Py_INCREF(*((PyObject**)dst));
-        src += sz;
-        dst += stride;
-    }
-}
 
 void set_array_not_contiguous(PyArrayObject* ao) {
     ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);

diff --git a/pandas/_libs/src/parser/.gitignore b/pandas/_libs/src/parser/.gitignore
diff --git a/pandas/_libs/src/parser/Makefile b/pandas/_libs/src/parser/Makefile
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -1317,21 +1317,6 @@ int parser_trim_buffers(parser_t *self) {
     return 0;
 }
 
-void debug_print_parser(parser_t *self) {
-    int64_t j, line;
-    char *token;
-
-    for (line = 0; line < self->lines; ++line) {
-        printf("(Parsed) Line %lld: ", (long long)line);
-
-        for (j = 0; j < self->line_fields[j]; ++j) {
-            token = self->words[j + self->line_start[line]];
-            printf("%s ", token);
-        }
-        printf("\n");
-    }
-}
-
 /*
   nrows : number of rows to tokenize (or until reach EOF)
   all : tokenize all the data vs. certain number of rows

diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h
@@ -247,8 +247,6 @@ void parser_del(parser_t *self);
 
 void parser_set_default_options(parser_t *self);
 
-void debug_print_parser(parser_t *self);
-
 int tokenize_nrows(parser_t *self, size_t nrows);
 
 int tokenize_all_rows(parser_t *self);