Skip to content

Commit dd5de76

Browse files
authored
Merge branch 'main' into renovate/all
2 parents 3e6c134 + bdd7c6c commit dd5de76

File tree

5 files changed

+111
-18
lines changed

5 files changed

+111
-18
lines changed

samples/snippets/snippets_test.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,11 @@
7272
import storage_set_bucket_default_kms_key
7373
import storage_set_client_endpoint
7474
import storage_set_metadata
75-
import storage_transfer_manager_download_all_blobs
75+
import storage_transfer_manager_download_bucket
7676
import storage_transfer_manager_download_chunks_concurrently
77+
import storage_transfer_manager_download_many
7778
import storage_transfer_manager_upload_directory
78-
import storage_transfer_manager_upload_many_blobs
79+
import storage_transfer_manager_upload_many
7980
import storage_upload_file
8081
import storage_upload_from_memory
8182
import storage_upload_from_stream
@@ -689,7 +690,7 @@ def test_transfer_manager_snippets(test_bucket, capsys):
689690
with open(os.path.join(uploads, name), "w") as f:
690691
f.write(name)
691692

692-
storage_transfer_manager_upload_many_blobs.upload_many_blobs_with_transfer_manager(
693+
storage_transfer_manager_upload_many.upload_many_blobs_with_transfer_manager(
693694
test_bucket.name,
694695
BLOB_NAMES,
695696
source_directory="{}/".format(uploads),
@@ -702,10 +703,24 @@ def test_transfer_manager_snippets(test_bucket, capsys):
702703

703704
with tempfile.TemporaryDirectory() as downloads:
704705
# Download the files.
705-
storage_transfer_manager_download_all_blobs.download_all_blobs_with_transfer_manager(
706+
storage_transfer_manager_download_bucket.download_bucket_with_transfer_manager(
706707
test_bucket.name,
707708
destination_directory=os.path.join(downloads, ""),
708709
processes=8,
710+
max_results=10000,
711+
)
712+
out, _ = capsys.readouterr()
713+
714+
for name in BLOB_NAMES:
715+
assert "Downloaded {}".format(name) in out
716+
717+
with tempfile.TemporaryDirectory() as downloads:
718+
# Download the files.
719+
storage_transfer_manager_download_many.download_many_blobs_with_transfer_manager(
720+
test_bucket.name,
721+
blob_names=BLOB_NAMES,
722+
destination_directory=os.path.join(downloads, ""),
723+
processes=8,
709724
)
710725
out, _ = capsys.readouterr()
711726

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Copyright 2022 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START storage_transfer_manager_download_bucket]
16+
def download_bucket_with_transfer_manager(
17+
bucket_name, destination_directory="", processes=8, max_results=1000
18+
):
19+
"""Download all of the blobs in a bucket, concurrently in a process pool.
20+
21+
The filename of each blob once downloaded is derived from the blob name and
22+
the `destination_directory `parameter. For complete control of the filename
23+
of each blob, use transfer_manager.download_many() instead.
24+
25+
Directories will be created automatically as needed, for instance to
26+
accommodate blob names that include slashes.
27+
"""
28+
29+
# The ID of your GCS bucket
30+
# bucket_name = "your-bucket-name"
31+
32+
# The directory on your computer to which to download all of the files. This
33+
# string is prepended (with os.path.join()) to the name of each blob to form
34+
# the full path. Relative paths and absolute paths are both accepted. An
35+
# empty string means "the current working directory". Note that this
36+
# parameter allows accepts directory traversal ("../" etc.) and is not
37+
# intended for unsanitized end user input.
38+
# destination_directory = ""
39+
40+
# The maximum number of processes to use for the operation. The performance
41+
# impact of this value depends on the use case, but smaller files usually
42+
# benefit from a higher number of processes. Each additional process occupies
43+
# some CPU and memory resources until finished.
44+
# processes=8
45+
46+
# The maximum number of results to fetch from bucket.list_blobs(). This
47+
# sample code fetches all of the blobs up to max_results and queues them all
48+
# for download at once. Though they will still be executed in batches up to
49+
# the processes limit, queueing them all at once can be taxing on system
50+
# memory if buckets are very large. Adjust max_results as needed for your
51+
# system environment, or set it to None if you are sure the bucket is not
52+
# too large to hold in memory easily.
53+
# max_results=1000
54+
55+
from google.cloud.storage import Client, transfer_manager
56+
57+
storage_client = Client()
58+
bucket = storage_client.bucket(bucket_name)
59+
60+
blob_names = [blob.name for blob in bucket.list_blobs(max_results=max_results)]
61+
62+
results = transfer_manager.download_many_to_path(
63+
bucket, blob_names, destination_directory=destination_directory, max_workers=processes
64+
)
65+
66+
for name, result in zip(blob_names, results):
67+
# The results list is either `None` or an exception for each blob in
68+
# the input list, in order.
69+
70+
if isinstance(result, Exception):
71+
print("Failed to download {} due to exception: {}".format(name, result))
72+
else:
73+
print("Downloaded {} to {}.".format(name, destination_directory + name))
74+
# [END storage_transfer_manager_download_bucket]

samples/snippets/storage_transfer_manager_download_chunks_concurrently.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
# [START storage_transfer_manager_download_chunks_concurrently]
1616
def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8):
17-
"""Download a single file in chunks, concurrently."""
17+
"""Download a single file in chunks, concurrently in a process pool."""
1818

1919
# The ID of your GCS bucket
2020
# bucket_name = "your-bucket-name"

samples/snippets/storage_transfer_manager_download_all_blobs.py renamed to samples/snippets/storage_transfer_manager_download_many.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Google LLC
1+
# Copyright 2023 Google LLC
22
#
33
# Licensed under the Apache License, Version 2.0 (the 'License');
44
# you may not use this file except in compliance with the License.
@@ -12,23 +12,29 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
# [START storage_transfer_manager_download_all_blobs]
16-
def download_all_blobs_with_transfer_manager(
17-
bucket_name, destination_directory="", processes=8
15+
# [START storage_transfer_manager_download_many]
16+
def download_many_blobs_with_transfer_manager(
17+
bucket_name, blob_names, destination_directory="", processes=8
1818
):
19-
"""Download all of the blobs in a bucket, concurrently in a thread pool.
19+
"""Download blobs in a list by name, concurrently in a process pool.
2020
2121
The filename of each blob once downloaded is derived from the blob name and
2222
the `destination_directory `parameter. For complete control of the filename
2323
of each blob, use transfer_manager.download_many() instead.
2424
25-
Directories will be created automatically as needed, for instance to
26-
accommodate blob names that include slashes.
25+
Directories will be created automatically as needed to accommodate blob
26+
names that include slashes.
2727
"""
2828

2929
# The ID of your GCS bucket
3030
# bucket_name = "your-bucket-name"
3131

32+
# The list of blob names to download. The names of each blobs will also
33+
# be the name of each destination file (use transfer_manager.download_many()
34+
# instead to control each destination file name). If there is a "/" in the
35+
# blob name, then corresponding directories will be created on download.
36+
# blob_names = ["myblob", "myblob2"]
37+
3238
# The directory on your computer to which to download all of the files. This
3339
# string is prepended (with os.path.join()) to the name of each blob to form
3440
# the full path. Relative paths and absolute paths are both accepted. An
@@ -48,8 +54,6 @@ def download_all_blobs_with_transfer_manager(
4854
storage_client = Client()
4955
bucket = storage_client.bucket(bucket_name)
5056

51-
blob_names = [blob.name for blob in bucket.list_blobs()]
52-
5357
results = transfer_manager.download_many_to_path(
5458
bucket, blob_names, destination_directory=destination_directory, max_workers=processes
5559
)
@@ -62,4 +66,4 @@ def download_all_blobs_with_transfer_manager(
6266
print("Failed to download {} due to exception: {}".format(name, result))
6367
else:
6468
print("Downloaded {} to {}.".format(name, destination_directory + name))
65-
# [END storage_transfer_manager_download_all_blobs]
69+
# [END storage_transfer_manager_download_many]

samples/snippets/storage_transfer_manager_upload_many_blobs.py renamed to samples/snippets/storage_transfer_manager_upload_many.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
# [START storage_transfer_manager_upload_many_blobs]
15+
# [START storage_transfer_manager_upload_many]
1616
def upload_many_blobs_with_transfer_manager(
1717
bucket_name, filenames, source_directory="", processes=8
1818
):
19-
"""Upload every file in a list to a bucket, concurrently in a thread pool.
19+
"""Upload every file in a list to a bucket, concurrently in a process pool.
2020
2121
Each blob name is derived from the filename, not including the
2222
`source_directory` parameter. For complete control of the blob name for each
@@ -63,4 +63,4 @@ def upload_many_blobs_with_transfer_manager(
6363
print("Failed to upload {} due to exception: {}".format(name, result))
6464
else:
6565
print("Uploaded {} to {}.".format(name, bucket.name))
66-
# [END storage_transfer_manager_upload_many_blobs]
66+
# [END storage_transfer_manager_upload_many]

0 commit comments

Comments
 (0)