diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 382f276b..2bc13ed7 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -337,10 +337,6 @@ def get_user_account_credentials(self): This method authenticates using user credentials, either loading saved credentials from a file or by going through the OAuth flow. - Parameters - ---------- - None - Returns ------- GoogleCredentials : credentials @@ -567,7 +563,7 @@ def load_data( try: for remaining_rows in _load.load_chunks( self.client, dataframe, dataset_id, table_id, - chunksize=chunksize): + chunksize=chunksize, schema=schema): self._print("\rLoad is {0}% Complete".format( ((total_rows - remaining_rows) * 100) / total_rows)) except self.http_error as ex: @@ -699,8 +695,9 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema): table = _Table(self.project_id, dataset_id, private_key=self.private_key) table.delete(table_id) - table.create(table_id, table_schema) - sleep(delay) + if not table.is_date_partitioned(table_id): + table.create(table_id, table_schema) + sleep(delay) def _get_credentials_file(): @@ -1007,6 +1004,11 @@ def _generate_bq_schema(df, default_type='STRING'): class _Table(GbqConnector): + partition_decorator = '$' + + def is_date_partitioned(self, table_id): + return self.partition_decorator in table_id + def __init__(self, project_id, dataset_id, reauth=False, verbose=False, private_key=None): self.dataset_id = dataset_id @@ -1017,7 +1019,7 @@ def exists(self, table_id): Parameters ---------- - table : str + table_id : str Name of table to be verified Returns @@ -1028,24 +1030,30 @@ def exists(self, table_id): from google.api_core.exceptions import NotFound table_ref = self.client.dataset(self.dataset_id).table(table_id) + try: - self.client.get_table(table_ref) + table = self.client.get_table(table_ref) + if self.is_date_partitioned(table_id): + return table.num_rows > 0 + return True except NotFound: return False except self.http_error as ex: self.process_http_error(ex) - def create(self, table_id, schema): + def create(self, table_id, schema, date_partitioned=False): """ Create a table in Google BigQuery given a table and schema Parameters ---------- - table : str + table_id : str Name of table to be written schema : str Use the generate_bq_schema to generate your table schema from a dataframe. + date_partitioned: boolean + Whether table is to be created as a date partitioned table. """ from google.cloud.bigquery import SchemaField from google.cloud.bigquery import Table @@ -1062,6 +1070,9 @@ def create(self, table_id, schema): table_ref = self.client.dataset(self.dataset_id).table(table_id) table = Table(table_ref) + if date_partitioned or self.is_date_partitioned(table_id): + table.partitioning_type = 'DAY' + # Manually create the schema objects, adding NULLABLE mode # as a workaround for # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456 @@ -1084,7 +1095,7 @@ def delete(self, table_id): Parameters ---------- - table : str + table_id : str Name of table to be deleted """ from google.api_core.exceptions import NotFound @@ -1163,7 +1174,7 @@ def create(self, dataset_id): Parameters ---------- - dataset : str + dataset_id : str Name of dataset to be written """ from google.cloud.bigquery import Dataset @@ -1184,7 +1195,7 @@ def delete(self, dataset_id): Parameters ---------- - dataset : str + dataset_id : str Name of dataset to be deleted """ from google.api_core.exceptions import NotFound @@ -1207,7 +1218,7 @@ def tables(self, dataset_id): Parameters ---------- - dataset : str + dataset_id : str Name of dataset to list tables for Returns