|
| 1 | +"""Async client for SDK downloads""" |
| 2 | + |
| 3 | +import os |
| 4 | +import asyncio |
| 5 | +import aiofiles |
| 6 | + |
| 7 | +from google.cloud.storage._experimental.asyncio.json import _helpers |
| 8 | +from google.cloud.storage._experimental.asyncio.json import download |
| 9 | +from google.cloud.storage._helpers import _DEFAULT_SCHEME |
| 10 | +from google.cloud.storage._helpers import _STORAGE_HOST_TEMPLATE |
| 11 | +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN |
| 12 | +from google.cloud.storage import blob |
| 13 | + |
| 14 | + |
| 15 | +_SLICED_DOWNLOAD_THRESHOLD = 1024*1024*1024 # 1GB |
| 16 | +_SLICED_DOWNLOAD_PARTS = 5 |
| 17 | +_USERAGENT = 'test-prototype' |
| 18 | + |
| 19 | + |
| 20 | +class AsyncClient(_helpers.ClientWithProjectAsync): |
| 21 | + |
| 22 | + SCOPE = ( |
| 23 | + "https://www.googleapis.com/auth/devstorage.full_control", |
| 24 | + "https://www.googleapis.com/auth/devstorage.read_only", |
| 25 | + "https://www.googleapis.com/auth/devstorage.read_write", |
| 26 | + ) |
| 27 | + |
| 28 | + @property |
| 29 | + def api_endpoint(self): |
| 30 | + return _DEFAULT_SCHEME + _STORAGE_HOST_TEMPLATE.format( |
| 31 | + universe_domain=_DEFAULT_UNIVERSE_DOMAIN |
| 32 | + ) |
| 33 | + |
| 34 | + def _get_download_url(self, blob_obj): |
| 35 | + return f'{self.api_endpoint}/download/storage/v1/b/{blob_obj.bucket.name}/o/{blob_obj.name}?alt=media' |
| 36 | + |
| 37 | + async def _perform_download( |
| 38 | + self, |
| 39 | + transport, |
| 40 | + file_obj, |
| 41 | + download_url, |
| 42 | + headers, |
| 43 | + start=None, |
| 44 | + end=None, |
| 45 | + timeout=None, |
| 46 | + checksum="md5", |
| 47 | + retry=_helpers.DEFAULT_ASYNC_RETRY, |
| 48 | + sequential_read=False, |
| 49 | + ): |
| 50 | + download_obj = download.DownloadAsync( |
| 51 | + download_url, |
| 52 | + stream=file_obj, |
| 53 | + headers=headers, |
| 54 | + start=start, |
| 55 | + end=end, |
| 56 | + checksum=checksum, |
| 57 | + retry=retry, |
| 58 | + sequential_read=sequential_read, |
| 59 | + ) |
| 60 | + await download_obj.consume(transport, timeout=timeout) |
| 61 | + |
| 62 | + def _check_if_sliced_download_is_eligible(self, obj_size, checksum): |
| 63 | + if obj_size < _SLICED_DOWNLOAD_THRESHOLD: |
| 64 | + return False |
| 65 | + # Need to support checksum validations for parallel downloads. |
| 66 | + return checksum==None |
| 67 | + |
| 68 | + async def download_to_file( |
| 69 | + self, |
| 70 | + blob_obj, |
| 71 | + filename, |
| 72 | + start=None, |
| 73 | + end=None, |
| 74 | + timeout=None, |
| 75 | + checksum="md5", |
| 76 | + retry=_helpers.DEFAULT_ASYNC_RETRY, |
| 77 | + sequential_read=False, |
| 78 | + ): |
| 79 | + download_url = self._get_download_url(blob_obj) |
| 80 | + headers = blob._get_encryption_headers(blob_obj._encryption_key) |
| 81 | + headers["accept-encoding"] = "gzip" |
| 82 | + headers = { |
| 83 | + **blob._get_default_headers(_USERAGENT), |
| 84 | + **headers, |
| 85 | + } |
| 86 | + |
| 87 | + transport = self._async_http |
| 88 | + if not blob_obj.size: |
| 89 | + blob_obj.reload() |
| 90 | + obj_size = blob_obj.size |
| 91 | + try: |
| 92 | + if not sequential_read and self._check_if_sliced_download_is_eligible(obj_size, checksum): # 1GB |
| 93 | + print("Sliced Download Preferred, and Starting...") |
| 94 | + chunks_offset = [0] + [obj_size//_SLICED_DOWNLOAD_PARTS]*(_SLICED_DOWNLOAD_PARTS-1) + [obj_size - obj_size//_SLICED_DOWNLOAD_PARTS*(_SLICED_DOWNLOAD_PARTS-1)] |
| 95 | + for i in range(1, _SLICED_DOWNLOAD_PARTS+1): |
| 96 | + chunks_offset[i]+=chunks_offset[i-1] |
| 97 | + |
| 98 | + with open(filename, 'wb') as _: pass # trunacates the file to zero, and keeps the file. |
| 99 | + |
| 100 | + tasks, file_handles = [], [] |
| 101 | + try: |
| 102 | + for idx in range(_SLICED_DOWNLOAD_PARTS): |
| 103 | + file_handle = await aiofiles.open(filename, 'r+b') |
| 104 | + await file_handle.seek(chunks_offset[idx]) |
| 105 | + tasks.append( |
| 106 | + self._perform_download( |
| 107 | + transport, |
| 108 | + file_handle, |
| 109 | + download_url, |
| 110 | + headers, |
| 111 | + chunks_offset[idx], |
| 112 | + chunks_offset[idx+1]-1, |
| 113 | + timeout=timeout, |
| 114 | + checksum=checksum, |
| 115 | + retry=retry, |
| 116 | + sequential_read=sequential_read, |
| 117 | + ) |
| 118 | + ) |
| 119 | + file_handles.append(file_handle) |
| 120 | + await asyncio.gather(*tasks) |
| 121 | + finally: |
| 122 | + for file_handle in file_handles: |
| 123 | + await file_handle.close() |
| 124 | + else: |
| 125 | + print("Sequential Download Preferred, and Starting...") |
| 126 | + async with aiofiles.open(filename, "wb") as file_obj: |
| 127 | + await self._perform_download( |
| 128 | + transport, |
| 129 | + file_obj, |
| 130 | + download_url, |
| 131 | + headers, |
| 132 | + start, |
| 133 | + end, |
| 134 | + timeout=timeout, |
| 135 | + checksum=checksum, |
| 136 | + retry=retry, |
| 137 | + sequential_read=sequential_read, |
| 138 | + ) |
| 139 | + except (blob.DataCorruption, blob.NotFound): |
| 140 | + await aiofiles.os.remove(filename) |
| 141 | + raise |
| 142 | + except blob.InvalidResponse as exc: |
| 143 | + blob._raise_from_invalid_response(exc) |
0 commit comments