Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 0 additions & 264 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,45 +122,6 @@ def test_create_client_default_credentials():
assert client is not None


def test_create_table_nested_repeated_schema(client, to_delete):
dataset_id = "create_table_nested_repeated_{}".format(_millis())
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_nested_repeated_schema]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')

schema = [
bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
bigquery.SchemaField(
"addresses",
"RECORD",
mode="REPEATED",
fields=[
bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
],
),
]
table_ref = dataset_ref.table("my_table")
table = bigquery.Table(table_ref, schema=schema)
table = client.create_table(table) # API request

print("Created table {}".format(table.full_table_id))
# [END bigquery_nested_repeated_schema]


def test_create_table_cmek(client, to_delete):
dataset_id = "create_table_cmek_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down Expand Up @@ -190,44 +151,6 @@ def test_create_table_cmek(client, to_delete):
# [END bigquery_create_table_cmek]


def test_create_partitioned_table(client, to_delete):
dataset_id = "create_table_partitioned_{}".format(_millis())
dataset_ref = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset_ref)
to_delete.append(dataset)

# [START bigquery_create_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')

table_ref = dataset_ref.table("my_partitioned_table")
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
table = bigquery.Table(table_ref, schema=schema)
table.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days

table = client.create_table(table)

print(
"Created table {}, partitioned on column {}".format(
table.table_id, table.time_partitioning.field
)
)
# [END bigquery_create_table_partitioned]

assert table.time_partitioning.type_ == "DAY"
assert table.time_partitioning.field == "date"
assert table.time_partitioning.expiration_ms == 7776000000


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -369,48 +292,6 @@ def test_update_table_expiration(client, to_delete):
# [END bigquery_update_table_expiration]


@pytest.mark.skip(
reason=(
"update_table() is flaky "
"https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
)
)
def test_relax_column(client, to_delete):
"""Updates a schema field from required to nullable."""
dataset_id = "relax_column_dataset_{}".format(_millis())
table_id = "relax_column_table_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_relax_column]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
# table_id = 'my_table'

original_schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
table_ref = client.dataset(dataset_id).table(table_id)
table = bigquery.Table(table_ref, schema=original_schema)
table = client.create_table(table)
assert all(field.mode == "REQUIRED" for field in table.schema)

# SchemaField properties cannot be edited after initialization.
# To make changes, construct new SchemaField objects.
relaxed_schema = [
bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
]
table.schema = relaxed_schema
table = client.update_table(table, ["schema"])

assert all(field.mode == "NULLABLE" for field in table.schema)
# [END bigquery_relax_column]


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -1007,151 +888,6 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys):
assert "Loaded 50 rows." in out


def test_load_table_add_column(client, to_delete):
dataset_id = "load_table_add_column_{}".format(_millis())
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
dataset.location = "US"
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

snippets_dir = os.path.abspath(os.path.dirname(__file__))
filepath = os.path.join(
snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv"
)
table_ref = dataset_ref.table("my_table")
old_schema = [bigquery.SchemaField("full_name", "STRING", mode="REQUIRED")]
table = client.create_table(bigquery.Table(table_ref, schema=old_schema))

# [START bigquery_add_column_load_append]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')
# filepath = 'path/to/your_file.csv'

# Retrieves the destination table and checks the length of the schema
table_id = "my_table"
table_ref = dataset_ref.table(table_id)
table = client.get_table(table_ref)
print("Table {} contains {} columns.".format(table_id, len(table.schema)))

# Configures the load job to append the data to the destination table,
# allowing field addition
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND
job_config.schema_update_options = [
bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION
]
# In this example, the existing table contains only the 'full_name' column.
# 'REQUIRED' fields cannot be added to an existing schema, so the
# additional column must be 'NULLABLE'.
job_config.schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
]
job_config.source_format = bigquery.SourceFormat.CSV
job_config.skip_leading_rows = 1

with open(filepath, "rb") as source_file:
job = client.load_table_from_file(
source_file,
table_ref,
location="US", # Must match the destination dataset location.
job_config=job_config,
) # API request

job.result() # Waits for table load to complete.
print(
"Loaded {} rows into {}:{}.".format(
job.output_rows, dataset_id, table_ref.table_id
)
)

# Checks the updated length of the schema
table = client.get_table(table)
print("Table {} now contains {} columns.".format(table_id, len(table.schema)))
# [END bigquery_add_column_load_append]
assert len(table.schema) == 2
assert table.num_rows > 0


def test_load_table_relax_column(client, to_delete):
dataset_id = "load_table_relax_column_{}".format(_millis())
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
dataset.location = "US"
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

snippets_dir = os.path.abspath(os.path.dirname(__file__))
filepath = os.path.join(
snippets_dir, "..", "..", "bigquery", "tests", "data", "people.csv"
)
table_ref = dataset_ref.table("my_table")
old_schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
bigquery.SchemaField("favorite_color", "STRING", mode="REQUIRED"),
]
table = client.create_table(bigquery.Table(table_ref, schema=old_schema))

# [START bigquery_relax_column_load_append]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')
# filepath = 'path/to/your_file.csv'

# Retrieves the destination table and checks the number of required fields
table_id = "my_table"
table_ref = dataset_ref.table(table_id)
table = client.get_table(table_ref)
original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema)
# In this example, the existing table has 3 required fields.
print("{} fields in the schema are required.".format(original_required_fields))

# Configures the load job to append the data to a destination table,
# allowing field relaxation
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND
job_config.schema_update_options = [
bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION
]
# In this example, the existing table contains three required fields
# ('full_name', 'age', and 'favorite_color'), while the data to load
# contains only the first two fields.
job_config.schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
job_config.source_format = bigquery.SourceFormat.CSV
job_config.skip_leading_rows = 1

with open(filepath, "rb") as source_file:
job = client.load_table_from_file(
source_file,
table_ref,
location="US", # Must match the destination dataset location.
job_config=job_config,
) # API request

job.result() # Waits for table load to complete.
print(
"Loaded {} rows into {}:{}.".format(
job.output_rows, dataset_id, table_ref.table_id
)
)

# Checks the updated number of required fields
table = client.get_table(table)
current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema)
print("{} fields in the schema are now required.".format(current_required_fields))
# [END bigquery_relax_column_load_append]
assert original_required_fields - current_required_fields == 1
assert len(table.schema) == 3
assert table.schema[2].mode == "NULLABLE"
assert table.num_rows > 0


def test_extract_table(client, to_delete):
bucket_name = "extract_shakespeare_{}".format(_millis())
storage_client = storage.Client()
Expand Down
50 changes: 50 additions & 0 deletions bigquery/docs/usage/tables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ Create an integer range partitioned table with the
:start-after: [START bigquery_create_table_range_partitioned]
:end-before: [END bigquery_create_table_range_partitioned]

Create a table partition with the
:func:`~google.cloud.bigquery.client.Client.create_table` method:

.. literalinclude:: ../samples/create_partitioned_table.py
:language: python
:dedent: 4
:start-after: [START bigquery_create_table_partitioned]
:end-before: [END bigquery_create_table_partitioned]

Create a nested repeated schema table partition with the
:func:`~google.cloud.bigquery.client.Client.create_table` method:

.. literalinclude:: ../samples/create_table_nested_repeated_schema.py
:language: python
:dedent: 4
:start-after: [START bigquery_nested_repeated_schema]
:end-before: [END bigquery_nested_repeated_schema]

Load table data from a file with the
:func:`~google.cloud.bigquery.client.Client.load_table_from_file` method:

Expand Down Expand Up @@ -154,6 +172,38 @@ Add an empty column to the existing table with the
:start-after: [START bigquery_add_empty_column]
:end-before: [END bigquery_add_empty_column]

Update schema field from required to nullable:

.. literalinclude:: ../samples/relax_column.py
:language: python
:dedent: 4
:start-after: [START bigquery_relax_column]
:end-before: [END bigquery_relax_column]

Add a column to the existing table:

.. literalinclude:: ../samples/load_table_add_column.py
:language: python
:dedent: 4
:start-after: [START bigquery_add_column_load_append]
:end-before: [END bigquery_add_column_load_append]

Add a nullable column to the existing table:

.. literalinclude:: ../samples/load_table_add_column.py
:language: python
:dedent: 4
:start-after: [START bigquery_add_column_load_append]
:end-before: [END bigquery_add_column_load_append]

Relaxing a required field in the original schema to nullable:

.. literalinclude:: ../samples/load_table_relax_column.py
:language: python
:dedent: 4
:start-after: [START bigquery_relax_column_load_append]
:end-before: [END bigquery_relax_column_load_append]

Copying a Table
^^^^^^^^^^^^^^^

Expand Down
Loading