Skip to content

Remove unused from _libs.parsers #19380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 17 additions & 97 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ import warnings

from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE

from libc.stdio cimport fopen, fclose
from libc.stdlib cimport malloc, free
from libc.string cimport strncpy, strlen, strcmp, strcasecmp
from libc.stdlib cimport free
from libc.string cimport strncpy, strlen, strcasecmp

cimport cython
from cython cimport Py_ssize_t
Expand All @@ -27,9 +26,6 @@ cdef extern from "Python.h":
object PyUnicode_Decode(char *v, Py_ssize_t size, char *encoding,
char *errors)

cdef extern from "stdlib.h":
void memcpy(void *dst, void *src, size_t n)


import numpy as np
cimport numpy as cnp
Expand All @@ -50,7 +46,7 @@ from khash cimport (

import pandas.compat as compat
from pandas.core.dtypes.common import (
is_categorical_dtype, CategoricalDtype,
is_categorical_dtype,
is_integer_dtype, is_float_dtype,
is_bool_dtype, is_object_dtype,
is_datetime64_dtype,
Expand Down Expand Up @@ -90,9 +86,6 @@ try:
except NameError:
basestring = str

cdef extern from "src/numpy_helper.h":
void transfer_object_column(char *dst, char *src, size_t stride,
size_t length)

cdef extern from "parser/tokenizer.h":

Expand Down Expand Up @@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h":

int parser_trim_buffers(parser_t *self)

void debug_print_parser(parser_t *self)

int tokenize_all_rows(parser_t *self) nogil
int tokenize_nrows(parser_t *self, size_t nrows) nogil

Expand All @@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h":
double round_trip(const char *p, char **q, char decimal, char sci,
char tsep, int skip_trailing) nogil

int to_longlong(char *item, long long *p_value) nogil
int to_boolean(const char *item, uint8_t *val) nogil


Expand Down Expand Up @@ -875,9 +865,6 @@ cdef class TextReader:

return header, field_count

cdef _implicit_index_count(self):
pass

def read(self, rows=None):
"""
rows=None --> read all rows
Expand Down Expand Up @@ -997,9 +984,6 @@ cdef class TextReader:

return columns

def debug_print(self):
debug_print_parser(self.parser)

cdef _start_clock(self):
self.clocks.append(time.time())

Expand Down Expand Up @@ -1346,6 +1330,7 @@ cdef class TextReader:
else:
return None


cdef object _true_values = [b'True', b'TRUE', b'true']
cdef object _false_values = [b'False', b'FALSE', b'false']

Expand Down Expand Up @@ -1375,21 +1360,6 @@ cdef asbytes(object o):
_NA_VALUES = _ensure_encoded(list(com._NA_VALUES))


def _is_file_like(obj):
if PY3:
import io
if isinstance(obj, io.TextIOWrapper):
raise ParserError('Cannot handle open unicode files (yet)')

# BufferedReader is a byte reader for Python 3
file = io.BufferedReader
else:
import __builtin__
file = __builtin__.file

return isinstance(obj, (basestring, file))


def _maybe_upcast(arr):
"""

Expand Down Expand Up @@ -1479,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col,

return result, na_count


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we not linting these .pyx for the 2 blank lines between functions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flake8 chokes on a SyntaxError as soon as it hits a cimport

cdef _string_box_utf8(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -1532,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,

return result, na_count


cdef _string_box_decode(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset,
Expand Down Expand Up @@ -1662,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
kh_destroy_str(table)
return np.asarray(codes), result, na_count


cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
int64_t line_end, int64_t width):
cdef:
Expand All @@ -1679,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,

return result


cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
size_t width, char *data) nogil:
Expand All @@ -1694,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
strncpy(data, word, width)
data += width


cdef char* cinf = b'inf'
cdef char* cposinf = b'+inf'
cdef char* cneginf = b'-inf'


cdef _try_double(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset, object na_flist):
Expand Down Expand Up @@ -1738,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col,
return None, None
return result, na_count


cdef inline int _try_double_nogil(parser_t *parser,
double (*double_converter)(
const char *, char **, char,
Expand Down Expand Up @@ -1808,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser,

return 0


cdef _try_uint64(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -1843,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col,

return result


cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
int64_t line_start,
int64_t line_end, bint na_filter,
Expand Down Expand Up @@ -1881,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,

return 0


cdef _try_int64(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -1909,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col,

return result, na_count


cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
int64_t line_start,
int64_t line_end, bint na_filter,
Expand Down Expand Up @@ -1948,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,

return 0

cdef _try_bool(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
cdef:
int na_count
Py_ssize_t lines = line_end - line_start
uint8_t *data
cnp.ndarray[cnp.uint8_t, ndim=1] result

uint8_t NA = na_values[np.bool_]

result = np.empty(lines)
data = <uint8_t *> result.data

with nogil:
error = _try_bool_nogil(parser, col, line_start,
line_end, na_filter,
na_hashset, NA, data,
&na_count)
if error != 0:
return None, None
return result.view(np.bool_), na_count

cdef inline int _try_bool_nogil(parser_t *parser, int64_t col,
int64_t line_start,
int64_t line_end, bint na_filter,
const kh_str_t *na_hashset, uint8_t NA,
uint8_t *data, int *na_count) nogil:
cdef:
int error
Py_ssize_t i, lines = line_end - line_start
coliter_t it
const char *word = NULL
khiter_t k
na_count[0] = 0

coliter_setup(&it, parser, col, line_start)

if na_filter:
for i in range(lines):
COLITER_NEXT(it, word)

k = kh_get_str(na_hashset, word)
# in the hash table
if k != na_hashset.n_buckets:
na_count[0] += 1
data[0] = NA
data += 1
continue

error = to_boolean(word, data)
if error != 0:
return error
data += 1
else:
for i in range(lines):
COLITER_NEXT(it, word)

error = to_boolean(word, data)
if error != 0:
return error
data += 1
return 0

cdef _try_bool_flex(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
Expand Down Expand Up @@ -2039,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col,
return None, None
return result.view(np.bool_), na_count


cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col,
int64_t line_start,
int64_t line_end, bint na_filter,
Expand Down Expand Up @@ -2131,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:

return table


cdef kh_float64_t* kset_float64_from_list(values) except NULL:
# caller takes responsibility for freeing the hash table
cdef:
Expand Down
16 changes: 0 additions & 16 deletions pandas/_libs/src/numpy_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,22 +75,6 @@ PANDAS_INLINE PyObject* char_to_string(char* data) {
#endif
}

void transfer_object_column(char* dst, char* src, size_t stride,
size_t length) {
size_t i;
size_t sz = sizeof(PyObject*);

for (i = 0; i < length; ++i) {
// uninitialized data

// Py_XDECREF(*((PyObject**) dst));

memcpy(dst, src, sz);
Py_INCREF(*((PyObject**)dst));
src += sz;
dst += stride;
}
}

void set_array_not_contiguous(PyArrayObject* ao) {
ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/src/parser/.gitignore

This file was deleted.

13 changes: 0 additions & 13 deletions pandas/_libs/src/parser/Makefile

This file was deleted.

15 changes: 0 additions & 15 deletions pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1317,21 +1317,6 @@ int parser_trim_buffers(parser_t *self) {
return 0;
}

void debug_print_parser(parser_t *self) {
int64_t j, line;
char *token;

for (line = 0; line < self->lines; ++line) {
printf("(Parsed) Line %lld: ", (long long)line);

for (j = 0; j < self->line_fields[j]; ++j) {
token = self->words[j + self->line_start[line]];
printf("%s ", token);
}
printf("\n");
}
}

/*
nrows : number of rows to tokenize (or until reach EOF)
all : tokenize all the data vs. certain number of rows
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/src/parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,6 @@ void parser_del(parser_t *self);

void parser_set_default_options(parser_t *self);

void debug_print_parser(parser_t *self);

int tokenize_nrows(parser_t *self, size_t nrows);

int tokenize_all_rows(parser_t *self);
Expand Down