From 84156559b8a1fa06ead8169954c769ab6e428642 Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Thu, 23 Jul 2015 10:59:45 -0400
Subject: [PATCH 1/5] Add usage docs for jobs:  query and load.

---
 docs/bigquery-usage.rst | 186 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst
index 87b948f7eba9..86170b91efea 100644
--- a/docs/bigquery-usage.rst
+++ b/docs/bigquery-usage.rst
@@ -236,3 +236,189 @@ Delete a table:
    >>> dataset = client.dataset('dataset_name')
    >>> table = dataset.table(name='person_ages')
    >>> table.delete()  # API request
+
+Jobs
+----
+
+Jobs describe actions peformed on data in BigQuery tables:
+
+- Load data into a table
+- Run a query against data in one or more tables
+- Extrat data from a table
+- Copy a table
+
+List jobs for a project:
+
+.. doctest::
+
+   >>> from gcloud import bigquery
+   >>> client = bigquery.Client()
+   >>> jobs = client.jobs()  # API request
+   >>> [(job.job_id, job.type, job.created, job.state) for job in jobs]
+   ['e3344fba-09df-4ae0-8337-fddee34b3840', 'insert', (datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=<UTC>), 'done')]
+
+Querying data (synchronous)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Run a query which can be expected to complete within bounded time:
+
+.. doctest::
+
+   >>> from gcloud import bigquery
+   >>> client = bigquery.Client()
+   >>> query = """\
+   SELECT count(*) AS age_count FROM dataset_name.person_ages
+   """
+   >>> results = client.query(query, timeout_ms=1000)
+   >>> while not results.job_complete:
+   ...    time.sleep(10)
+   ...    results.reload()  # API request
+   >>> results.schema
+   [{'name': 'age_count', 'type': 'integer', 'mode': 'nullable'}]
+   >>> results.rows
+   [(15,)]
+
+.. note::
+   
+   If the query takse longer than the timeout allowd, ``results.job_complete``
+   will be False:  we therefore poll until it is completed.
+
+Querying data (asynchronous)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Background a query, loading the results into a table:
+
+.. doctest::
+
+   >>> from gcloud import bigquery
+   >>> client = bigquery.Client()
+   >>> query = """\
+   SELECT firstname + ' ' + last_name AS full_name,
+          FLOOR(DATEDIFF(CURRENT_DATE(), birth_date) / 365) AS age
+    FROM dataset_name.persons
+   """
+   >>> dataset = client.dataset('dataset_name')
+   >>> table = dataset.table(name='person_ages')
+   >>> job = client.query_async(query,
+   ...                          destination=table,
+   ...                          write_disposition='truncate')
+   >>> job.job_id
+   'e3344fba-09df-4ae0-8337-fddee34b3840'
+   >>> job.type
+   'load'
+   >>> job.created
+   None
+   >>> job.state
+   None
+
+.. note::
+
+   - ``gcloud.bigquery`` generates a UUID for each job.
+   - The ``created`` and ``state`` fields are not set until the job
+     is submitted to the BigQuery back-end.
+
+Then, begin executing the job on the server:
+
+.. doctest::
+
+   >>> job.submit()  # API call
+   >>> job.created
+   datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=<UTC>)
+   >>> job.state
+   'running'
+
+Poll until the job is complete:
+
+.. doctest::
+
+   >>> import time
+   >>> while job.state == 'running':
+   ...     time.sleep(10)
+   ...     job.reload()  # API call
+   >>> job.state
+   'done'
+   >>> job.ended
+   datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=<UTC>)
+
+Inserting data (synchronous)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start a job loading data asynchronously from a local CSV files.
+into a new table.  First, create the job locally:
+
+.. doctest::
+
+   >>> from gcloud import bigquery
+   >>> client = bigquery.Client()
+   >>> table = dataset.table(name='person_ages')
+   >>> with open('/path/to/person_ages.csv') as f:
+   ...     job = table.load_from_file(f,
+   ...                                source_format='CSV',
+   ...                                skip_leading_rows=1
+   ...                                write_disposition='truncate',
+   ...                               )  # API request
+   >>> job.job_id
+   'e3344fba-09df-4ae0-8337-fddee34b3840'
+   >>> job.type
+   'load'
+   >>> job.created
+   datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=<UTC>)
+   >>> job.state
+   'done'
+   >>> job.ended
+   datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=<UTC>)
+
+Inserting data (asynchronous)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start a job loading data asynchronously from a set of CSV files, located on
+GCloud Storage, appending rows into an existing table.  First, create the job
+locally:
+
+.. doctest::
+
+   >>> from gcloud import bigquery
+   >>> client = bigquery.Client()
+   >>> table = dataset.table(name='person_ages')
+   >>> job = table.load_from_storage(bucket_name='bucket-name',
+   ...                               object_name='object-prefix*',
+   ...                               source_format='CSV',
+   ...                               skip_leading_rows=1
+   ...                               write_disposition='truncate')
+   >>> job.job_id
+   'e3344fba-09df-4ae0-8337-fddee34b3840'
+   >>> job.type
+   'load'
+   >>> job.created
+   None
+   >>> job.state
+   None
+
+.. note::
+
+   - ``gcloud.bigquery`` generates a UUID for each job.
+   - The ``created`` and ``state`` fields are not set until the job
+     is submitted to the BigQuery back-end.
+
+Then, begin executing the job on the server:
+
+.. doctest::
+
+   >>> job.submit()  # API call
+   >>> job.created
+   datetime.datetime(2015, 7, 23, 9, 30, 20, 268260, tzinfo=<UTC>)
+   >>> job.state
+   'running'
+
+Poll until the job is complete:
+
+.. doctest::
+
+   >>> import time
+   >>> while job.state == 'running':
+   ...     time.sleep(10)
+   ...     job.reload()  # API call
+   >>> job.state
+   'done'
+   >>> job.ended
+   datetime.datetime(2015, 7, 23, 9, 30, 21, 334792, tzinfo=<UTC>)

From 02f4a921902f7591f403f06a79e8c972c64567fd Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Thu, 23 Jul 2015 13:44:01 -0400
Subject: [PATCH 2/5] Typo fixes.

Addresses feedback from @dhermes on #1014.
---
 docs/bigquery-usage.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst
index 86170b91efea..030fb076bc8f 100644
--- a/docs/bigquery-usage.rst
+++ b/docs/bigquery-usage.rst
@@ -244,7 +244,7 @@ Jobs describe actions peformed on data in BigQuery tables:
 
 - Load data into a table
 - Run a query against data in one or more tables
-- Extrat data from a table
+- Extract data from a table
 - Copy a table
 
 List jobs for a project:
@@ -280,8 +280,8 @@ Run a query which can be expected to complete within bounded time:
 
 .. note::
    
-   If the query takse longer than the timeout allowd, ``results.job_complete``
-   will be False:  we therefore poll until it is completed.
+   If the query takes longer than the timeout allowed, ``results.job_complete``
+   will be ``False``:  we therefore poll until it is completed.
 
 Querying data (asynchronous)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -343,8 +343,8 @@ Poll until the job is complete:
 Inserting data (synchronous)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Start a job loading data asynchronously from a local CSV files.
-into a new table.  First, create the job locally:
+Load data synchronously from a local CSV file into a new table.  First,
+create the job locally:
 
 .. doctest::
 
@@ -383,7 +383,7 @@ locally:
    >>> job = table.load_from_storage(bucket_name='bucket-name',
    ...                               object_name='object-prefix*',
    ...                               source_format='CSV',
-   ...                               skip_leading_rows=1
+   ...                               skip_leading_rows=1,
    ...                               write_disposition='truncate')
    >>> job.job_id
    'e3344fba-09df-4ae0-8337-fddee34b3840'

From 31f083c35acb5edd1dd080fa2191a5d5882d5a0c Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Thu, 23 Jul 2015 13:49:25 -0400
Subject: [PATCH 3/5] Bound polling loops.

Addresses feedback from @dhermes in #1014.
---
 docs/bigquery-usage.rst | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst
index 030fb076bc8f..cd7d42b38338 100644
--- a/docs/bigquery-usage.rst
+++ b/docs/bigquery-usage.rst
@@ -270,9 +270,11 @@ Run a query which can be expected to complete within bounded time:
    SELECT count(*) AS age_count FROM dataset_name.person_ages
    """
    >>> results = client.query(query, timeout_ms=1000)
-   >>> while not results.job_complete:
-   ...    time.sleep(10)
-   ...    results.reload()  # API request
+   >>> retry_count = 100
+   >>> while retry_count > 0 and not results.job_complete:
+   ...     retry_count -= 1
+   ...     time.sleep(10)
+   ...     results.reload()  # API request
    >>> results.schema
    [{'name': 'age_count', 'type': 'integer', 'mode': 'nullable'}]
    >>> results.rows
@@ -332,7 +334,9 @@ Poll until the job is complete:
 .. doctest::
 
    >>> import time
-   >>> while job.state == 'running':
+   >>> retry_count = 100
+   >>> while retry_count > 0 and job.state == 'running':
+   ...     retry_count -= 1
    ...     time.sleep(10)
    ...     job.reload()  # API call
    >>> job.state
@@ -415,7 +419,9 @@ Poll until the job is complete:
 .. doctest::
 
    >>> import time
-   >>> while job.state == 'running':
+   >>> retry_count = 100
+   >>> while retry_count > 0 and job.state == 'running':
+   ...     retry_count -= 1
    ...     time.sleep(10)
    ...     job.reload()  # API call
    >>> job.state

From 9f07efd43bab2c707b7bfeddc7acd43f76a3492e Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Thu, 23 Jul 2015 13:54:47 -0400
Subject: [PATCH 4/5] Avoid 'f' as variable name. pass mode to 'open'.

Addresses feedback from @dhermes in #1014.
---
 docs/bigquery-usage.rst | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst
index cd7d42b38338..1edf28e86d5f 100644
--- a/docs/bigquery-usage.rst
+++ b/docs/bigquery-usage.rst
@@ -355,12 +355,13 @@ create the job locally:
    >>> from gcloud import bigquery
    >>> client = bigquery.Client()
    >>> table = dataset.table(name='person_ages')
-   >>> with open('/path/to/person_ages.csv') as f:
-   ...     job = table.load_from_file(f,
-   ...                                source_format='CSV',
-   ...                                skip_leading_rows=1
-   ...                                write_disposition='truncate',
-   ...                               )  # API request
+   >>> with open('/path/to/person_ages.csv', 'rb') as file_obj:
+   ...     job = table.load_from_file(
+   ...         file_obj,
+   ...         source_format='CSV',
+   ...         skip_leading_rows=1
+   ...         write_disposition='truncate',
+   ...         )  # API request
    >>> job.job_id
    'e3344fba-09df-4ae0-8337-fddee34b3840'
    >>> job.type

From 48eef9253d3c57f8e1cdb88dcbcb9c5a2a5ae5c4 Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Fri, 24 Jul 2015 10:49:20 -0400
Subject: [PATCH 5/5] Give argument a more descriptive name.

Addresses feedback from @dhermes on #1014.
---
 docs/bigquery-usage.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst
index 1edf28e86d5f..9bf03710f8ea 100644
--- a/docs/bigquery-usage.rst
+++ b/docs/bigquery-usage.rst
@@ -386,7 +386,7 @@ locally:
    >>> client = bigquery.Client()
    >>> table = dataset.table(name='person_ages')
    >>> job = table.load_from_storage(bucket_name='bucket-name',
-   ...                               object_name='object-prefix*',
+   ...                               object_name_glob='object-prefix*',
    ...                               source_format='CSV',
    ...                               skip_leading_rows=1,
    ...                               write_disposition='truncate')