31
31
import typing
32
32
from typing import (
33
33
Any ,
34
- BinaryIO ,
35
34
Dict ,
35
+ IO ,
36
36
Iterable ,
37
+ Mapping ,
37
38
List ,
38
39
Optional ,
39
40
Sequence ,
112
113
pyarrow = _helpers .PYARROW_VERSIONS .try_import ()
113
114
114
115
TimeoutType = Union [float , None ]
116
+ ResumableTimeoutType = Union [
117
+ None , float , Tuple [float , float ]
118
+ ] # for resumable media methods
115
119
116
120
if typing .TYPE_CHECKING : # pragma: NO COVER
117
121
# os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.
118
122
PathType = Union [str , bytes , os .PathLike [str ], os .PathLike [bytes ]]
123
+ import pandas # type: ignore
124
+ import requests # required by api-core
119
125
120
126
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
121
127
_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
@@ -2348,7 +2354,7 @@ def load_table_from_uri(
2348
2354
2349
2355
def load_table_from_file (
2350
2356
self ,
2351
- file_obj : BinaryIO ,
2357
+ file_obj : IO [ bytes ] ,
2352
2358
destination : Union [Table , TableReference , TableListItem , str ],
2353
2359
rewind : bool = False ,
2354
2360
size : int = None ,
@@ -2358,50 +2364,50 @@ def load_table_from_file(
2358
2364
location : str = None ,
2359
2365
project : str = None ,
2360
2366
job_config : LoadJobConfig = None ,
2361
- timeout : TimeoutType = DEFAULT_TIMEOUT ,
2367
+ timeout : ResumableTimeoutType = DEFAULT_TIMEOUT ,
2362
2368
) -> job .LoadJob :
2363
2369
"""Upload the contents of this table from a file-like object.
2364
2370
2365
2371
Similar to :meth:`load_table_from_uri`, this method creates, starts and
2366
2372
returns a :class:`~google.cloud.bigquery.job.LoadJob`.
2367
2373
2368
2374
Args:
2369
- file_obj (file): A file handle opened in binary mode for reading.
2370
- destination (Union[ \
2371
- google.cloud.bigquery.table.Table, \
2372
- google.cloud.bigquery.table.TableReference, \
2373
- google.cloud.bigquery.table.TableListItem, \
2374
- str, \
2375
- ]):
2375
+ file_obj:
2376
+ A file handle opened in binary mode for reading.
2377
+ destination:
2376
2378
Table into which data is to be loaded. If a string is passed
2377
2379
in, this method attempts to create a table reference from a
2378
2380
string using
2379
2381
:func:`google.cloud.bigquery.table.TableReference.from_string`.
2380
2382
2381
2383
Keyword Arguments:
2382
- rewind (Optional[bool]) :
2384
+ rewind:
2383
2385
If True, seek to the beginning of the file handle before
2384
2386
reading the file.
2385
- size (Optional[int]) :
2387
+ size:
2386
2388
The number of bytes to read from the file handle. If size is
2387
2389
``None`` or large, resumable upload will be used. Otherwise,
2388
2390
multipart upload will be used.
2389
- num_retries (Optional[int]) : Number of upload retries. Defaults to 6.
2390
- job_id (Optional[str]) : Name of the job.
2391
- job_id_prefix (Optional[str]) :
2391
+ num_retries: Number of upload retries. Defaults to 6.
2392
+ job_id: Name of the job.
2393
+ job_id_prefix:
2392
2394
The user-provided prefix for a randomly generated job ID.
2393
2395
This parameter will be ignored if a ``job_id`` is also given.
2394
- location (Optional[str]) :
2396
+ location:
2395
2397
Location where to run the job. Must match the location of the
2396
2398
destination table.
2397
- project (Optional[str]) :
2399
+ project:
2398
2400
Project ID of the project of where to run the job. Defaults
2399
2401
to the client's project.
2400
- job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]) :
2402
+ job_config:
2401
2403
Extra configuration options for the job.
2402
- timeout (Optional[float]) :
2404
+ timeout:
2403
2405
The number of seconds to wait for the underlying HTTP transport
2404
- before using ``retry``.
2406
+ before using ``retry``. Depending on the retry strategy, a request
2407
+ may be repeated several times using the same timeout each time.
2408
+
2409
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2410
+ See :meth:`requests.Session.request` documentation for details.
2405
2411
2406
2412
Returns:
2407
2413
google.cloud.bigquery.job.LoadJob: A new load job.
@@ -2453,7 +2459,7 @@ def load_table_from_file(
2453
2459
2454
2460
def load_table_from_dataframe (
2455
2461
self ,
2456
- dataframe ,
2462
+ dataframe : "pandas.DataFrame" ,
2457
2463
destination : Union [Table , TableReference , str ],
2458
2464
num_retries : int = _DEFAULT_NUM_RETRIES ,
2459
2465
job_id : str = None ,
@@ -2462,7 +2468,7 @@ def load_table_from_dataframe(
2462
2468
project : str = None ,
2463
2469
job_config : LoadJobConfig = None ,
2464
2470
parquet_compression : str = "snappy" ,
2465
- timeout : TimeoutType = DEFAULT_TIMEOUT ,
2471
+ timeout : ResumableTimeoutType = DEFAULT_TIMEOUT ,
2466
2472
) -> job .LoadJob :
2467
2473
"""Upload the contents of a table from a pandas DataFrame.
2468
2474
@@ -2481,9 +2487,9 @@ def load_table_from_dataframe(
2481
2487
https://github.com/googleapis/python-bigquery/issues/19
2482
2488
2483
2489
Args:
2484
- dataframe (pandas.DataFrame) :
2490
+ dataframe:
2485
2491
A :class:`~pandas.DataFrame` containing the data to load.
2486
- destination (google.cloud.bigquery.table.TableReference) :
2492
+ destination:
2487
2493
The destination table to use for loading the data. If it is an
2488
2494
existing table, the schema of the :class:`~pandas.DataFrame`
2489
2495
must match the schema of the destination table. If the table
@@ -2495,19 +2501,19 @@ def load_table_from_dataframe(
2495
2501
:func:`google.cloud.bigquery.table.TableReference.from_string`.
2496
2502
2497
2503
Keyword Arguments:
2498
- num_retries (Optional[int]) : Number of upload retries.
2499
- job_id (Optional[str]) : Name of the job.
2500
- job_id_prefix (Optional[str]) :
2504
+ num_retries: Number of upload retries.
2505
+ job_id: Name of the job.
2506
+ job_id_prefix:
2501
2507
The user-provided prefix for a randomly generated
2502
2508
job ID. This parameter will be ignored if a ``job_id`` is
2503
2509
also given.
2504
- location (Optional[str]) :
2510
+ location:
2505
2511
Location where to run the job. Must match the location of the
2506
2512
destination table.
2507
- project (Optional[str]) :
2513
+ project:
2508
2514
Project ID of the project of where to run the job. Defaults
2509
2515
to the client's project.
2510
- job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]) :
2516
+ job_config:
2511
2517
Extra configuration options for the job.
2512
2518
2513
2519
To override the default pandas data type conversions, supply
@@ -2524,7 +2530,7 @@ def load_table_from_dataframe(
2524
2530
:attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and
2525
2531
:attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are
2526
2532
supported.
2527
- parquet_compression (Optional[str]) :
2533
+ parquet_compression:
2528
2534
[Beta] The compression method to use if intermittently
2529
2535
serializing ``dataframe`` to a parquet file.
2530
2536
@@ -2537,9 +2543,13 @@ def load_table_from_dataframe(
2537
2543
passed as the ``compression`` argument to the underlying
2538
2544
``DataFrame.to_parquet()`` method.
2539
2545
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
2540
- timeout (Optional[float]) :
2546
+ timeout:
2541
2547
The number of seconds to wait for the underlying HTTP transport
2542
- before using ``retry``.
2548
+ before using ``retry``. Depending on the retry strategy, a request may
2549
+ be repeated several times using the same timeout each time.
2550
+
2551
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2552
+ See :meth:`requests.Session.request` documentation for details.
2543
2553
2544
2554
Returns:
2545
2555
google.cloud.bigquery.job.LoadJob: A new load job.
@@ -2717,7 +2727,7 @@ def load_table_from_json(
2717
2727
location : str = None ,
2718
2728
project : str = None ,
2719
2729
job_config : LoadJobConfig = None ,
2720
- timeout : TimeoutType = DEFAULT_TIMEOUT ,
2730
+ timeout : ResumableTimeoutType = DEFAULT_TIMEOUT ,
2721
2731
) -> job .LoadJob :
2722
2732
"""Upload the contents of a table from a JSON string or dict.
2723
2733
@@ -2741,36 +2751,35 @@ def load_table_from_json(
2741
2751
client = bigquery.Client()
2742
2752
client.load_table_from_file(data_as_file, ...)
2743
2753
2744
- destination (Union[ \
2745
- google.cloud.bigquery.table.Table, \
2746
- google.cloud.bigquery.table.TableReference, \
2747
- google.cloud.bigquery.table.TableListItem, \
2748
- str, \
2749
- ]):
2754
+ destination:
2750
2755
Table into which data is to be loaded. If a string is passed
2751
2756
in, this method attempts to create a table reference from a
2752
2757
string using
2753
2758
:func:`google.cloud.bigquery.table.TableReference.from_string`.
2754
2759
2755
2760
Keyword Arguments:
2756
- num_retries (Optional[int]) : Number of upload retries.
2757
- job_id (Optional[str]) : Name of the job.
2758
- job_id_prefix (Optional[str]) :
2761
+ num_retries: Number of upload retries.
2762
+ job_id: Name of the job.
2763
+ job_id_prefix:
2759
2764
The user-provided prefix for a randomly generated job ID.
2760
2765
This parameter will be ignored if a ``job_id`` is also given.
2761
- location (Optional[str]) :
2766
+ location:
2762
2767
Location where to run the job. Must match the location of the
2763
2768
destination table.
2764
- project (Optional[str]) :
2769
+ project:
2765
2770
Project ID of the project of where to run the job. Defaults
2766
2771
to the client's project.
2767
- job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]) :
2772
+ job_config:
2768
2773
Extra configuration options for the job. The ``source_format``
2769
2774
setting is always set to
2770
2775
:attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`.
2771
- timeout (Optional[float]) :
2776
+ timeout:
2772
2777
The number of seconds to wait for the underlying HTTP transport
2773
- before using ``retry``.
2778
+ before using ``retry``. Depending on the retry strategy, a request may
2779
+ be repeated several times using the same timeout each time.
2780
+
2781
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2782
+ See :meth:`requests.Session.request` documentation for details.
2774
2783
2775
2784
Returns:
2776
2785
google.cloud.bigquery.job.LoadJob: A new load job.
@@ -2819,60 +2828,77 @@ def load_table_from_json(
2819
2828
)
2820
2829
2821
2830
def _do_resumable_upload (
2822
- self , stream , metadata , num_retries , timeout , project = None
2823
- ):
2831
+ self ,
2832
+ stream : IO [bytes ],
2833
+ metadata : Mapping [str , str ],
2834
+ num_retries : int ,
2835
+ timeout : Optional [ResumableTimeoutType ],
2836
+ project : Optional [str ] = None ,
2837
+ ) -> "requests.Response" :
2824
2838
"""Perform a resumable upload.
2825
2839
2826
2840
Args:
2827
- stream (IO[bytes]) : A bytes IO object open for reading.
2841
+ stream: A bytes IO object open for reading.
2828
2842
2829
- metadata (Dict) : The metadata associated with the upload.
2843
+ metadata: The metadata associated with the upload.
2830
2844
2831
- num_retries (int) :
2845
+ num_retries:
2832
2846
Number of upload retries. (Deprecated: This
2833
2847
argument will be removed in a future release.)
2834
2848
2835
- timeout (float) :
2849
+ timeout:
2836
2850
The number of seconds to wait for the underlying HTTP transport
2837
- before using ``retry``.
2851
+ before using ``retry``. Depending on the retry strategy, a request may
2852
+ be repeated several times using the same timeout each time.
2838
2853
2839
- project (Optional[str]):
2854
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2855
+ See :meth:`requests.Session.request` documentation for details.
2856
+
2857
+ project:
2840
2858
Project ID of the project of where to run the upload. Defaults
2841
2859
to the client's project.
2842
2860
2843
2861
Returns:
2844
- requests.Response:
2845
- The "200 OK" response object returned after the final chunk
2846
- is uploaded.
2862
+ The "200 OK" response object returned after the final chunk
2863
+ is uploaded.
2847
2864
"""
2848
2865
upload , transport = self ._initiate_resumable_upload (
2849
2866
stream , metadata , num_retries , timeout , project = project
2850
2867
)
2851
2868
2852
2869
while not upload .finished :
2853
- response = upload .transmit_next_chunk (transport )
2870
+ response = upload .transmit_next_chunk (transport , timeout = timeout )
2854
2871
2855
2872
return response
2856
2873
2857
2874
def _initiate_resumable_upload (
2858
- self , stream , metadata , num_retries , timeout , project = None
2875
+ self ,
2876
+ stream : IO [bytes ],
2877
+ metadata : Mapping [str , str ],
2878
+ num_retries : int ,
2879
+ timeout : Optional [ResumableTimeoutType ],
2880
+ project : Optional [str ] = None ,
2859
2881
):
2860
2882
"""Initiate a resumable upload.
2861
2883
2862
2884
Args:
2863
- stream (IO[bytes]) : A bytes IO object open for reading.
2885
+ stream: A bytes IO object open for reading.
2864
2886
2865
- metadata (Dict) : The metadata associated with the upload.
2887
+ metadata: The metadata associated with the upload.
2866
2888
2867
- num_retries (int) :
2889
+ num_retries:
2868
2890
Number of upload retries. (Deprecated: This
2869
2891
argument will be removed in a future release.)
2870
2892
2871
- timeout (float) :
2893
+ timeout:
2872
2894
The number of seconds to wait for the underlying HTTP transport
2873
- before using ``retry``.
2895
+ before using ``retry``. Depending on the retry strategy, a request may
2896
+ be repeated several times using the same timeout each time.
2874
2897
2875
- project (Optional[str]):
2898
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2899
+ See :meth:`requests.Session.request` documentation for details.
2900
+
2901
+ project:
2876
2902
Project ID of the project of where to run the upload. Defaults
2877
2903
to the client's project.
2878
2904
@@ -2921,29 +2947,39 @@ def _initiate_resumable_upload(
2921
2947
return upload , transport
2922
2948
2923
2949
def _do_multipart_upload (
2924
- self , stream , metadata , size , num_retries , timeout , project = None
2950
+ self ,
2951
+ stream : IO [bytes ],
2952
+ metadata : Mapping [str , str ],
2953
+ size : int ,
2954
+ num_retries : int ,
2955
+ timeout : Optional [ResumableTimeoutType ],
2956
+ project : Optional [str ] = None ,
2925
2957
):
2926
2958
"""Perform a multipart upload.
2927
2959
2928
2960
Args:
2929
- stream (IO[bytes]) : A bytes IO object open for reading.
2961
+ stream: A bytes IO object open for reading.
2930
2962
2931
- metadata (Dict) : The metadata associated with the upload.
2963
+ metadata: The metadata associated with the upload.
2932
2964
2933
- size (int) :
2965
+ size:
2934
2966
The number of bytes to be uploaded (which will be read
2935
2967
from ``stream``). If not provided, the upload will be
2936
2968
concluded once ``stream`` is exhausted (or :data:`None`).
2937
2969
2938
- num_retries (int) :
2970
+ num_retries:
2939
2971
Number of upload retries. (Deprecated: This
2940
2972
argument will be removed in a future release.)
2941
2973
2942
- timeout (float) :
2974
+ timeout:
2943
2975
The number of seconds to wait for the underlying HTTP transport
2944
- before using ``retry``.
2976
+ before using ``retry``. Depending on the retry strategy, a request may
2977
+ be repeated several times using the same timeout each time.
2945
2978
2946
- project (Optional[str]):
2979
+ Can also be passed as a tuple (connect_timeout, read_timeout).
2980
+ See :meth:`requests.Session.request` documentation for details.
2981
+
2982
+ project:
2947
2983
Project ID of the project of where to run the upload. Defaults
2948
2984
to the client's project.
2949
2985
0 commit comments