Skip to content

BUG: func 'to_pickle' and 'read_pickle' where not accepting URL GH30299 #30302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@ I/O
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
-
- Bug in :func: 'to_pickle' and :func: 'read_pickle' where not accepting URL (:issue:'30299')

Plotting
^^^^^^^^
Expand Down
65 changes: 49 additions & 16 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
""" pickle compat """
import pickle
from typing import Any, Optional
import warnings

from pandas.compat import pickle_compat as pc

from pandas.io.common import _get_handle, _stringify_path
from pandas._typing import FilePathOrBuffer

from pandas.io.common import (
_get_handle,
get_filepath_or_buffer as _get_filepath_or_buffer,
)

def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):

def to_pickle(
obj: Any,
filepath_or_buffer: FilePathOrBuffer,
compression: Optional[str] = "infer",
protocol: int = pickle.HIGHEST_PROTOCOL,
):
"""
Pickle (serialize) object to file.

Expand All @@ -16,10 +27,12 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
obj : any object
Any python object.
path : str
File path where the pickled object will be stored.
File path, URL, or buffer where the pickled object will be stored.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
A string representing the compression to use in the output file. By
default, infers from the file extension in specified path.
If 'infer' and 'path_or_url' is path-like, then detect compression from
the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
compression) If 'infer' and 'path_or_url' is not path-like, then use
None (= no decompression).
protocol : int
Int which indicates which protocol should be used by the pickler,
default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
Expand Down Expand Up @@ -63,8 +76,12 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
>>> import os
>>> os.remove("./dummy.pkl")
"""
path = _stringify_path(path)
f, fh = _get_handle(path, "wb", compression=compression, is_text=False)
fp_or_buf, _, compression, should_close = _get_filepath_or_buffer(
filepath_or_buffer, compression=compression, mode="wb"
)
if not isinstance(fp_or_buf, str) and compression == "infer":
compression = None
f, fh = _get_handle(fp_or_buf, "wb", compression=compression, is_text=False)
if protocol < 0:
protocol = pickle.HIGHEST_PROTOCOL
try:
Expand All @@ -73,9 +90,16 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL):
f.close()
for _f in fh:
_f.close()
if should_close:
try:
fp_or_buf.close()
except ValueError:
pass


def read_pickle(path, compression="infer"):
def read_pickle(
filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] = "infer"
):
"""
Load pickled pandas object (or any object) from file.

Expand All @@ -86,13 +110,13 @@ def read_pickle(path, compression="infer"):

Parameters
----------
path : str
File path where the pickled object will be loaded.
filepath_or_buffer : str
File path, URL, or buffer where the pickled object will be loaded from.
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
For on-the-fly decompression of on-disk data. If 'infer', then use
gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
or '.zip' respectively, and no decompression otherwise.
Set to None for no decompression.
If 'infer' and 'path_or_url' is path-like, then detect compression from
the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
compression) If 'infer' and 'path_or_url' is not path-like, then use
None (= no decompression).

Returns
-------
Expand Down Expand Up @@ -134,8 +158,12 @@ def read_pickle(path, compression="infer"):
>>> import os
>>> os.remove("./dummy.pkl")
"""
path = _stringify_path(path)
f, fh = _get_handle(path, "rb", compression=compression, is_text=False)
fp_or_buf, _, compression, should_close = _get_filepath_or_buffer(
filepath_or_buffer, compression=compression
)
if not isinstance(fp_or_buf, str) and compression == "infer":
compression = None
f, fh = _get_handle(fp_or_buf, "rb", compression=compression, is_text=False)

# 1) try standard library Pickle
# 2) try pickle_compat (older pandas version) to handle subclass changes
Expand All @@ -159,3 +187,8 @@ def read_pickle(path, compression="infer"):
f.close()
for _f in fh:
_f.close()
if should_close:
try:
fp_or_buf.close()
except ValueError:
pass