From 71455035500d73797229575244fd200fbd36bbf5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 15 Oct 2013 23:48:15 +0200 Subject: [PATCH] DOC: fix building of gbq docs --- doc/source/v0.13.0.txt | 38 ++++++++++++++--------------- pandas/core/frame.py | 28 +++++++++++----------- pandas/io/gbq.py | 54 ++++++++++++++++++++++-------------------- 3 files changed, 61 insertions(+), 59 deletions(-) diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 14e120fdff672..b2c78f38140b4 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -651,7 +651,7 @@ Experimental - ``pandas.io.gbq`` provides a simple way to extract from, and load data into, Google's BigQuery Data Sets by way of pandas DataFrames. BigQuery is a high performance SQL-like database service, useful for performing ad-hoc queries - against extremely large datasets. :ref:`See the docs` + against extremely large datasets. :ref:`See the docs ` .. code-block:: python @@ -684,24 +684,24 @@ Experimental df3 = pandas.concat([df2.min(), df2.mean(), df2.max()], axis=1,keys=["Min Tem", "Mean Temp", "Max Temp"]) - The resulting dataframe is: - - ``` - Min Tem Mean Temp Max Temp - MONTH - 1 -53.336667 39.827892 89.770968 - 2 -49.837500 43.685219 93.437932 - 3 -77.926087 48.708355 96.099998 - 4 -82.892858 55.070087 97.317240 - 5 -92.378261 61.428117 102.042856 - 6 -77.703334 65.858888 102.900000 - 7 -87.821428 68.169663 106.510714 - 8 -89.431999 68.614215 105.500000 - 9 -86.611112 63.436935 107.142856 - 10 -78.209677 56.880838 92.103333 - 11 -50.125000 48.861228 94.996428 - 12 -50.332258 42.286879 94.396774 - ``` + The resulting dataframe is:: + + > df3 + Min Tem Mean Temp Max Temp + MONTH + 1 -53.336667 39.827892 89.770968 + 2 -49.837500 43.685219 93.437932 + 3 -77.926087 48.708355 96.099998 + 4 -82.892858 55.070087 97.317240 + 5 -92.378261 61.428117 102.042856 + 6 -77.703334 65.858888 102.900000 + 7 -87.821428 68.169663 106.510714 + 8 -89.431999 68.614215 105.500000 + 9 -86.611112 63.436935 107.142856 + 10 -78.209677 56.880838 92.103333 + 11 -50.125000 48.861228 94.996428 + 12 -50.332258 42.286879 94.396774 + .. warning:: To use this module, you will need a BigQuery account. See diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 504d49ddca13a..bfc086b09730e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -672,34 +672,34 @@ def to_dict(self, outtype='dict'): raise ValueError("outtype %s not understood" % outtype) def to_gbq(self, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs): - """ - Write a DataFrame to a Google BigQuery table. If the table exists, - the DataFrame will be appended. If not, a new table will be created, - in which case the schema will have to be specified. By default, + """Write a DataFrame to a Google BigQuery table. + + If the table exists, the DataFrame will be appended. If not, a new table + will be created, in which case the schema will have to be specified. By default, rows will be written in the order they appear in the DataFrame, though the user may specify an alternative order. Parameters --------------- - destination_table: string + destination_table : string name of table to be written, in the form 'dataset.tablename' schema : sequence (optional) list of column types in order for data to be inserted, e.g. ['INTEGER', 'TIMESTAMP', 'BOOLEAN'] - col_order: sequence (optional) + col_order : sequence (optional) order which columns are to be inserted, e.g. ['primary_key', 'birthday', 'username'] - if_exists: {'fail', 'replace', 'append'} (optional) - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + if_exists : {'fail', 'replace', 'append'} (optional) + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. kwargs are passed to the Client constructor - Raises: + Raises ------ - SchemaMissing: + SchemaMissing : Raised if the 'if_exists' parameter is set to 'replace', but no schema is specified - TableExists: + TableExists : Raised if the specified 'destination_table' exists but the 'if_exists' parameter is set to 'fail' (the default) - InvalidSchema: + InvalidSchema : Raised if the 'schema' parameter does not match the provided DataFrame """ diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index f226af6629aa5..931aa732d5286 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -316,36 +316,36 @@ def _parse_data(client, job, index_col=None, col_order=None): return final_df def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs): - """ - Write a DataFrame to a Google BigQuery table. If the table exists, - the DataFrame will be appended. If not, a new table will be created, - in which case the schema will have to be specified. By default, + """Write a DataFrame to a Google BigQuery table. + + If the table exists, the DataFrame will be appended. If not, a new table + will be created, in which case the schema will have to be specified. By default, rows will be written in the order they appear in the DataFrame, though the user may specify an alternative order. Parameters - --------------- - dataframe: DataFrame + ---------- + dataframe : DataFrame DataFrame to be written - destination_table: string + destination_table : string name of table to be written, in the form 'dataset.tablename' schema : sequence (optional) list of column types in order for data to be inserted, e.g. ['INTEGER', 'TIMESTAMP', 'BOOLEAN'] - col_order: sequence (optional) + col_order : sequence (optional) order which columns are to be inserted, e.g. ['primary_key', 'birthday', 'username'] - if_exists: {'fail', 'replace', 'append'} (optional) - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + if_exists : {'fail', 'replace', 'append'} (optional) + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. kwargs are passed to the Client constructor - Raises: + Raises ------ - SchemaMissing: + SchemaMissing : Raised if the 'if_exists' parameter is set to 'replace', but no schema is specified - TableExists: + TableExists : Raised if the specified 'destination_table' exists but the 'if_exists' parameter is set to 'fail' (the default) - InvalidSchema: + InvalidSchema : Raised if the 'schema' parameter does not match the provided DataFrame """ @@ -416,35 +416,37 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists= job = client.Load(table_reference, csv_file.name, schema=schema, **opts) def read_gbq(query, project_id = None, destination_table = None, index_col=None, col_order=None, **kwargs): - """ + """Load data from Google BigQuery. + The main method a user calls to load data from Google BigQuery into a pandas DataFrame. This is a simple wrapper for Google's bq.py and bigquery_client.py, which we use to get the source data. Because of this, this script respects the user's bq settings file, '~/.bigqueryrc', if it exists. Such a file can be generated using 'bq init'. Further, - additional parameters for the query can be specified as either **kwds in the command, + additional parameters for the query can be specified as either ``**kwds`` in the command, or using FLAGS provided in the 'gflags' module. Particular options can be found in bigquery_client.py. Parameters ---------- - query: str + query : str SQL-Like Query to return data values - project_id: str (optional) + project_id : str (optional) Google BigQuery Account project ID. Optional, since it may be located in ~/.bigqueryrc - index_col: str (optional) + index_col : str (optional) Name of result column to use for index in results DataFrame - col_order: list(str) (optional) + col_order : list(str) (optional) List of BigQuery column names in the desired order for results DataFrame - destination_table: string (optional) + destination_table : string (optional) If provided, send the results to the given table. - **kwargs: to be passed to bq.Client.Create(). Particularly: 'trace', 'sync', - 'api', 'api_version' + **kwargs : + To be passed to bq.Client.Create(). Particularly: 'trace', + 'sync', 'api', 'api_version' Returns ------- - df: pandas DataFrame + df: DataFrame DataFrame representing results of query """