-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG/ENH: consistent gzip compression arguments #35645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,13 @@ | |
from io import BytesIO, StringIO | ||
from itertools import islice | ||
import os | ||
from typing import Any, Callable, Optional, Type | ||
from typing import IO, Any, Callable, List, Optional, Type | ||
|
||
import numpy as np | ||
|
||
import pandas._libs.json as json | ||
from pandas._libs.tslibs import iNaT | ||
from pandas._typing import JSONSerializable, StorageOptions | ||
from pandas._typing import CompressionOptions, JSONSerializable, StorageOptions | ||
from pandas.errors import AbstractMethodError | ||
from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments | ||
|
||
|
@@ -19,7 +19,12 @@ | |
from pandas.core.construction import create_series_with_explicit_dtype | ||
from pandas.core.reshape.concat import concat | ||
|
||
from pandas.io.common import get_filepath_or_buffer, get_handle, infer_compression | ||
from pandas.io.common import ( | ||
get_compression_method, | ||
get_filepath_or_buffer, | ||
get_handle, | ||
infer_compression, | ||
) | ||
from pandas.io.json._normalize import convert_to_line_delimits | ||
from pandas.io.json._table_schema import build_table_schema, parse_table_schema | ||
from pandas.io.parsers import _validate_integer | ||
|
@@ -41,7 +46,7 @@ def to_json( | |
date_unit: str = "ms", | ||
default_handler: Optional[Callable[[Any], JSONSerializable]] = None, | ||
lines: bool = False, | ||
compression: Optional[str] = "infer", | ||
compression: CompressionOptions = "infer", | ||
index: bool = True, | ||
indent: int = 0, | ||
storage_options: StorageOptions = None, | ||
|
@@ -369,7 +374,7 @@ def read_json( | |
encoding=None, | ||
lines: bool = False, | ||
chunksize: Optional[int] = None, | ||
compression="infer", | ||
compression: CompressionOptions = "infer", | ||
nrows: Optional[int] = None, | ||
storage_options: StorageOptions = None, | ||
): | ||
|
@@ -607,7 +612,9 @@ def read_json( | |
if encoding is None: | ||
encoding = "utf-8" | ||
|
||
compression = infer_compression(path_or_buf, compression) | ||
compression_method, compression = get_compression_method(compression) | ||
compression_method = infer_compression(path_or_buf, compression_method) | ||
compression = dict(compression, method=compression_method) | ||
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( | ||
path_or_buf, | ||
encoding=encoding, | ||
|
@@ -667,10 +674,13 @@ def __init__( | |
encoding, | ||
lines: bool, | ||
chunksize: Optional[int], | ||
compression, | ||
compression: CompressionOptions, | ||
nrows: Optional[int], | ||
): | ||
|
||
compression_method, compression = get_compression_method(compression) | ||
compression = dict(compression, method=compression_method) | ||
|
||
self.orient = orient | ||
self.typ = typ | ||
self.dtype = dtype | ||
|
@@ -687,6 +697,7 @@ def __init__( | |
self.nrows_seen = 0 | ||
self.should_close = False | ||
self.nrows = nrows | ||
self.file_handles: List[IO] = [] | ||
|
||
if self.chunksize is not None: | ||
self.chunksize = _validate_integer("chunksize", self.chunksize, 1) | ||
|
@@ -735,8 +746,8 @@ def _get_data_from_filepath(self, filepath_or_buffer): | |
except (TypeError, ValueError): | ||
pass | ||
|
||
if exists or self.compression is not None: | ||
data, _ = get_handle( | ||
if exists or self.compression["method"] is not None: | ||
data, self.file_handles = get_handle( | ||
filepath_or_buffer, | ||
"r", | ||
encoding=self.encoding, | ||
|
@@ -816,6 +827,8 @@ def close(self): | |
self.open_stream.close() | ||
except (IOError, AttributeError): | ||
pass | ||
for file_handle in self.file_handles: | ||
file_handle.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably unrelated to the recent CI issues, but we should definitely close those handles. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, is there a ResoucceWarning? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't seen any when reading/writing json files |
||
|
||
def __next__(self): | ||
if self.nrows: | ||
|
Uh oh!
There was an error while loading. Please reload this page.