From 4158bcbdd85ce8bcb9eafe8277127e325553e358 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Tue, 29 Sep 2020 10:51:14 +0530 Subject: [PATCH 1/2] feat: add size parameter for load from dataframe and json --- google/cloud/bigquery/client.py | 4 +++- tests/unit/test_client.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d2aa45999..505d41e6c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2258,11 +2258,13 @@ def load_table_from_dataframe( dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: + file_size = os.path.getsize(tmppath) return self.load_table_from_file( parquet_file, destination, num_retries=num_retries, rewind=True, + size=file_size, job_id=job_id, job_id_prefix=job_id_prefix, location=location, @@ -2365,10 +2367,10 @@ def load_table_from_json( data_str = u"\n".join(json.dumps(item) for item in json_rows) data_file = io.BytesIO(data_str.encode()) - return self.load_table_from_file( data_file, destination, + size=len(data_str), num_retries=num_retries, job_id=job_id, job_id_prefix=job_id_prefix, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c4c604ed0..e45afe47a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7486,6 +7486,7 @@ def test_load_table_from_dataframe(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7529,6 +7530,7 @@ def test_load_table_from_dataframe_w_client_location(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7581,6 +7583,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7635,6 +7638,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7727,6 +7731,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7786,6 +7791,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7831,6 +7837,7 @@ def test_load_table_from_dataframe_unknown_table(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7945,6 +7952,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7991,6 +7999,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8065,6 +8074,7 @@ def test_load_table_from_dataframe_struct_fields(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8138,6 +8148,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8232,6 +8243,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8288,6 +8300,7 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8403,6 +8416,7 @@ def test_load_table_from_dataframe_w_nulls(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8454,6 +8468,7 @@ def test_load_table_from_json_basic_use(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, @@ -8505,6 +8520,7 @@ def test_load_table_from_json_non_default_args(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, From 321d97768a960ed1ec80631f2689e90473196c80 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Wed, 30 Sep 2020 15:37:25 +0530 Subject: [PATCH 2/2] pref: calculate length of encoded string --- google/cloud/bigquery/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 505d41e6c..82712fccc 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2366,11 +2366,12 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) data_str = u"\n".join(json.dumps(item) for item in json_rows) - data_file = io.BytesIO(data_str.encode()) + encoded_str = data_str.encode() + data_file = io.BytesIO(encoded_str) return self.load_table_from_file( data_file, destination, - size=len(data_str), + size=len(encoded_str), num_retries=num_retries, job_id=job_id, job_id_prefix=job_id_prefix,