Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions google/cloud/bigquery/job/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,26 @@
from google.cloud.bigquery.query import ConnectionProperty


class ColumnNameCharacterMap:
"""Indicates the character map used for column names.

https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap
"""

COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED"
"""Unspecified column name character map."""

STRICT = "STRICT"
"""Support flexible column name and reject invalid column names."""

V1 = "V1"
""" Support alphanumeric + underscore characters and names must start with
a letter or underscore. Invalid column names will be normalized."""

V2 = "V2"
"""Support flexible column name. Invalid column names will be normalized."""


class LoadJobConfig(_JobConfig):
"""Configuration options for load jobs.

Expand Down Expand Up @@ -597,6 +617,27 @@ def parquet_options(self, value):
else:
self._del_sub_prop("parquetOptions")

@property
def column_name_character_map(self) -> str:
"""Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]:
Character map supported for column names in CSV/Parquet loads. Defaults
to STRICT and can be overridden by Project Config Service. Using this
option with unsupported load formats will result in an error.

See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map
"""
return self._get_sub_prop(
"columnNameCharacterMap",
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

@column_name_character_map.setter
def column_name_character_map(self, value: Optional[str]):
if value is None:
value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
self._set_sub_prop("columnNameCharacterMap", value)


class LoadJob(_AsyncJob):
"""Asynchronous job for loading data into a table.
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/job/test_load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,3 +843,42 @@ def test_parquet_options_setter_clearing(self):

config.parquet_options = None
self.assertNotIn("parquetOptions", config._properties["load"])

def test_column_name_character_map_missing(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PREFERENCE:

These tests are nearly identical. The only real change is the choice of setting and whether the outcome matches.

This feels like an ideal situation for a parameterized test.

I would recommend we shorten this code by about 27 lines by using a pytest parameterization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion! It would be really nice if we could make the tests more compact. For the purpose of this PR, I just followed the same pattern as the rest of this test file. I think we can consider revamping the test as a whole so the style of the tests can be more consistent. I do wonder though, because we are not calling the exact same methods in each of the tests (sometimes we assign value in initialization, sometimes by calling the method, or directly accessing the _properties dict), is there a nice way to represent these in test parameterization?

from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
self.assertEqual(
config.column_name_character_map,
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

def test_column_name_character_map_hit(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config._properties["load"]["columnNameCharacterMap"] = "STRICT"
self.assertEqual(
config.column_name_character_map,
ColumnNameCharacterMap.STRICT,
)

def test_column_name_character_map_setter(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config.column_name_character_map = "V1"
self.assertEqual(
config._properties["load"]["columnNameCharacterMap"],
ColumnNameCharacterMap.V1,
)

def test_column_name_character_map_none(self):
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = self._get_target_class()()
config.column_name_character_map = None
self.assertEqual(
config._properties["load"]["columnNameCharacterMap"],
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)