-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Storage options #35381
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Storage options #35381
Changes from 22 commits
3a54dde
f0922c4
e549f8d
e8540c4
0034bff
19f041d
f9e1e69
7f69afe
cc0e4c3
e356e93
c7170dd
b96778d
1dc41b1
d882984
f1e455d
c88b75f
58481a4
704770b
1b8637e
bbcef17
a18686c
fa656cb
e8d5312
a79a274
97c7263
e99f8ed
23f4fc4
38a8330
32cf204
682e9e5
f7f086c
b5138f0
afdc030
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -265,6 +265,12 @@ SSH, FTP, dropbox and github. For docs and capabilities, see the `fsspec docs`_. | |
The existing capability to interface with S3 and GCS will be unaffected by this | ||
change, as ``fsspec`` will still bring in the same packages as before. | ||
|
||
Many read/write functions have acquired the `storage_options` optional argument, | ||
to pass a dictionary of parameters to the storage backend. This allows, for | ||
example, for passing credentials to S3 and GCS storage. The details of what | ||
parameters can be passed to which backends can be found in the documentation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a lint from fsspec docs to the storage back ends? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
of the individual storage backends. | ||
|
||
.. _Azure Datalake and Blob: https://github.com/dask/adlfs | ||
|
||
.. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1224,3 +1224,21 @@ def sort_by_key(request): | |
Tests None (no key) and the identity key. | ||
""" | ||
return request.param | ||
|
||
|
||
@pytest.fixture() | ||
def fsspectest(): | ||
pytest.importorskip("fsspec") | ||
from fsspec.implementations.memory import MemoryFileSystem | ||
from fsspec import register_implementation | ||
|
||
class TestMemoryFS(MemoryFileSystem): | ||
protocol = "testmem" | ||
test = [None] | ||
|
||
def __init__(self, **kwargs): | ||
self.test[0] = kwargs.pop("test", None) | ||
super().__init__(**kwargs) | ||
|
||
register_implementation("testmem", TestMemoryFS, True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keyword for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need to deregister the implementation as a teardown? Does any state leak between tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, but done now anyway. The tests were only seeing that the |
||
return TestMemoryFS() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2055,6 +2055,7 @@ def to_stata( | |
version: Optional[int] = 114, | ||
convert_strl: Optional[Sequence[Label]] = None, | ||
compression: Union[str, Mapping[str, str], None] = "infer", | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
) -> None: | ||
""" | ||
Export DataFrame object to Stata dta format. | ||
|
@@ -2131,6 +2132,16 @@ def to_stata( | |
|
||
.. versionadded:: 1.1.0 | ||
|
||
storage_options : dict, optional | ||
Extra options that make sense for a particular storage connection, e.g. | ||
host, port, username, password, etc., if using a URL that will | ||
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error | ||
will be raised if providing this argument with a local path or | ||
a file-like buffer. See the fsspec and backend storage implementation | ||
docs for the set of allowed keys and values | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
.. versionadded:: 1.1.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1.2.0 |
||
|
||
Raises | ||
------ | ||
NotImplementedError | ||
|
@@ -2187,6 +2198,7 @@ def to_stata( | |
write_index=write_index, | ||
variable_labels=variable_labels, | ||
compression=compression, | ||
storage_options=storage_options, | ||
**kwargs, | ||
) | ||
writer.write_file() | ||
|
@@ -2239,9 +2251,10 @@ def to_feather(self, path, **kwargs) -> None: | |
) | ||
def to_markdown( | ||
self, | ||
buf: Optional[IO[str]] = None, | ||
mode: Optional[str] = None, | ||
buf: Optional[Union[IO[str], str]] = None, | ||
mode: str = "wt", | ||
index: bool = True, | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
**kwargs, | ||
) -> Optional[str]: | ||
if "showindex" in kwargs: | ||
|
@@ -2259,9 +2272,14 @@ def to_markdown( | |
result = tabulate.tabulate(self, **kwargs) | ||
if buf is None: | ||
return result | ||
buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) | ||
buf, _, _, should_close = get_filepath_or_buffer( | ||
buf, mode=mode, storage_options=storage_options | ||
) | ||
assert buf is not None # Help mypy. | ||
assert not isinstance(buf, str) | ||
buf.writelines(result) | ||
if should_close: | ||
buf.close() | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return None | ||
|
||
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path") | ||
|
@@ -2272,6 +2290,7 @@ def to_parquet( | |
compression: Optional[str] = "snappy", | ||
index: Optional[bool] = None, | ||
partition_cols: Optional[List[str]] = None, | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
**kwargs, | ||
) -> None: | ||
""" | ||
|
@@ -2320,6 +2339,16 @@ def to_parquet( | |
|
||
.. versionadded:: 0.24.0 | ||
|
||
storage_options : dict, optional | ||
Extra options that make sense for a particular storage connection, e.g. | ||
host, port, username, password, etc., if using a URL that will | ||
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error | ||
will be raised if providing this argument with a local path or | ||
a file-like buffer. See the fsspec and backend storage implementation | ||
docs for the set of allowed keys and values | ||
|
||
.. versionadded:: 1.1.0 | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
**kwargs | ||
Additional arguments passed to the parquet library. See | ||
:ref:`pandas io <io.parquet>` for more details. | ||
|
@@ -2366,6 +2395,7 @@ def to_parquet( | |
compression=compression, | ||
index=index, | ||
partition_cols=partition_cols, | ||
storage_options=storage_options, | ||
**kwargs, | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2042,6 +2042,7 @@ def to_json( | |
compression: Optional[str] = "infer", | ||
index: bool_t = True, | ||
indent: Optional[int] = None, | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
) -> Optional[str]: | ||
""" | ||
Convert the object to a JSON string. | ||
|
@@ -2125,6 +2126,16 @@ def to_json( | |
|
||
.. versionadded:: 1.0.0 | ||
|
||
storage_options : dict, optional | ||
Extra options that make sense for a particular storage connection, e.g. | ||
host, port, username, password, etc., if using a URL that will | ||
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error | ||
will be raised if providing this argument with a local path or | ||
a file-like buffer. See the fsspec and backend storage implementation | ||
docs for the set of allowed keys and values | ||
|
||
.. versionadded:: 1.1.0 | ||
|
||
Returns | ||
------- | ||
None or str | ||
|
@@ -2303,6 +2314,7 @@ def to_json( | |
compression=compression, | ||
index=index, | ||
indent=indent, | ||
storage_options=storage_options, | ||
) | ||
|
||
def to_hdf( | ||
|
@@ -2617,6 +2629,7 @@ def to_pickle( | |
path, | ||
compression: Optional[str] = "infer", | ||
protocol: int = pickle.HIGHEST_PROTOCOL, | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
) -> None: | ||
""" | ||
Pickle (serialize) object to file. | ||
|
@@ -2637,6 +2650,16 @@ def to_pickle( | |
|
||
.. [1] https://docs.python.org/3/library/pickle.html. | ||
|
||
storage_options : dict, optional | ||
Extra options that make sense for a particular storage connection, e.g. | ||
host, port, username, password, etc., if using a URL that will | ||
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error | ||
will be raised if providing this argument with a local path or | ||
a file-like buffer. See the fsspec and backend storage implementation | ||
docs for the set of allowed keys and values | ||
|
||
.. versionadded:: 1.1.0 | ||
|
||
See Also | ||
-------- | ||
read_pickle : Load pickled pandas object (or any object) from file. | ||
|
@@ -2670,7 +2693,13 @@ def to_pickle( | |
""" | ||
from pandas.io.pickle import to_pickle | ||
|
||
to_pickle(self, path, compression=compression, protocol=protocol) | ||
to_pickle( | ||
self, | ||
path, | ||
compression=compression, | ||
protocol=protocol, | ||
storage_options=storage_options, | ||
) | ||
|
||
def to_clipboard( | ||
self, excel: bool_t = True, sep: Optional[str] = None, **kwargs | ||
|
@@ -3010,6 +3039,7 @@ def to_csv( | |
escapechar: Optional[str] = None, | ||
decimal: Optional[str] = ".", | ||
errors: str = "strict", | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
) -> Optional[str]: | ||
r""" | ||
Write object to a comma-separated values (csv) file. | ||
|
@@ -3109,6 +3139,14 @@ def to_csv( | |
See the errors argument for :func:`open` for a full list | ||
of options. | ||
|
||
storage_options : dict, optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i wonder if there is a way to share doc-strings components for all of these i/o methods There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not aware of a way There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i think we can do this with our shared docs infra, but out of scope for now |
||
Extra options that make sense for a particular storage connection, e.g. | ||
host, port, username, password, etc., if using a URL that will | ||
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error | ||
will be raised if providing this argument with a local path or | ||
a file-like buffer. See the fsspec and backend storage implementation | ||
docs for the set of allowed keys and values | ||
|
||
.. versionadded:: 1.1.0 | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
|
@@ -3163,6 +3201,7 @@ def to_csv( | |
doublequote=doublequote, | ||
escapechar=escapechar, | ||
decimal=decimal, | ||
storage_options=storage_options, | ||
) | ||
formatter.save() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
import csv as csvlib | ||
from io import StringIO | ||
import os | ||
from typing import Hashable, List, Mapping, Optional, Sequence, Union | ||
from typing import Any, Dict, Hashable, List, Mapping, Optional, Sequence, Union | ||
import warnings | ||
from zipfile import ZipFile | ||
|
||
|
@@ -54,6 +54,7 @@ def __init__( | |
doublequote: bool = True, | ||
escapechar: Optional[str] = None, | ||
decimal=".", | ||
storage_options: Optional[Dict[str, Any]] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add this to typing.py, e.g. StorageOptions=.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Up to you There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am leaving it for now, but can do as you suggest if you request it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seeing as we are doing this in lots of places I would add it |
||
): | ||
self.obj = obj | ||
|
||
|
@@ -64,7 +65,11 @@ def __init__( | |
compression, self.compression_args = get_compression_method(compression) | ||
|
||
self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer( | ||
path_or_buf, encoding=encoding, compression=compression, mode=mode | ||
path_or_buf, | ||
encoding=encoding, | ||
compression=compression, | ||
mode=mode, | ||
storage_options=storage_options, | ||
) | ||
self.sep = sep | ||
self.na_rep = na_rep | ||
|
Uh oh!
There was an error while loading. Please reload this page.