Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class KeyResultStatementKind:


class StandardSqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in GoogleSQL.
"""

def _generate_next_value_(name, start, count, last_values):
return name

Expand All @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values):
ARRAY = enum.auto()
STRUCT = enum.auto()
RANGE = enum.auto()
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
FOREIGN = enum.auto()


class EntityTypes(str, enum.Enum):
Expand All @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum):
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
class SqlTypeNames(str, enum.Enum):
"""Enum of allowed SQL type names in schema.SchemaField."""
"""Enum of allowed SQL type names in schema.SchemaField.

Datatype used in Legacy SQL.
"""

STRING = "STRING"
BYTES = "BYTES"
Expand All @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum):
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
RANGE = "RANGE" # NOTE: not available in legacy types
# NOTE: FOREIGN acts as a wrapper for data types
# not natively understood by BigQuery unless translated
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can this be indented to align with the rest of the commend above it?

Copy link
Collaborator Author

@chalmerlowe chalmerlowe Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting.
When you try to indent the second line, black the Python formatter kicks the starting point back to the left and aligns it under the F in Foreign.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved both lines of comment above the object FOREIGN so that they both align together.

FOREIGN = "FOREIGN"


class WriteDisposition(object):
Expand Down Expand Up @@ -344,3 +358,9 @@ class DeterminismLevel:

NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
"""The UDF is not deterministic."""


class RoundingMode(enum.Enum):
ROUNDING_MODE_UNSPECIFIED = 0
ROUND_HALF_AWAY_FROM_ZERO = 1
ROUND_HALF_EVEN = 2
82 changes: 75 additions & 7 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
_isinstance_or_raise,
_get_sub_prop,
)
from google.cloud.bigquery.enums import StandardSqlTypeNames
from google.cloud.bigquery.enums import StandardSqlTypeNames, RoundingMode


_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
LEGACY_TO_STANDARD_TYPES = {
"STRING": StandardSqlTypeNames.STRING,
"BYTES": StandardSqlTypeNames.BYTES,
Expand All @@ -52,6 +52,7 @@
"DATE": StandardSqlTypeNames.DATE,
"TIME": StandardSqlTypeNames.TIME,
"DATETIME": StandardSqlTypeNames.DATETIME,
"FOREIGN": StandardSqlTypeNames.FOREIGN,
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
Expand Down Expand Up @@ -170,6 +171,34 @@ class SchemaField(object):
the type is RANGE, this field is required. Possible values for the
field element type of a RANGE include `DATE`, `DATETIME` and
`TIMESTAMP`.

rounding_mode: Union[RoundingMode, str, None]
Specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.

Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.

ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.
For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5, 1.6, 1.7, 1.8, 1.9 => 2

ROUND_HALF_EVEN rounds half values to the nearest even value
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
values.
For Scale: 0
1.1, 1.2, 1.3, 1.4 => 1
1.5 => 2
1.6, 1.7, 1.8, 1.9 => 2
2.5 => 2

foreign_type_definition: Optional[str]
Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
"""

def __init__(
Expand All @@ -185,11 +214,12 @@ def __init__(
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
range_element_type: Union[FieldElementType, str, None] = None,
rounding_mode: Union[RoundingMode, str, None] = None,
foreign_type_definition: Optional[str] = None,
):
self._properties: Dict[str, Any] = {
"name": name,
"type": field_type,
}
self._properties: Dict[str, Any] = {}

self._properties["name"] = name
if mode is not None:
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
Expand All @@ -210,6 +240,23 @@ def __init__(
self._properties["rangeElementType"] = {"type": range_element_type}
if isinstance(range_element_type, FieldElementType):
self._properties["rangeElementType"] = range_element_type.to_api_repr()
if isinstance(rounding_mode, RoundingMode):
self._properties["roundingMode"] = rounding_mode.name
if isinstance(rounding_mode, str):
self._properties["roundingMode"] = rounding_mode
if isinstance(foreign_type_definition, str):
self._properties["foreignTypeDefinition"] = foreign_type_definition

# The order of operations is important:
# If field_type is FOREIGN, then foreign_type_definition must be set.
if field_type != "FOREIGN":
self._properties["type"] = field_type
else:
if self._properties.get("foreignTypeDefinition") is None:
raise ValueError(
"If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required."
)
self._properties["type"] = field_type

self._fields = tuple(fields)

Expand Down Expand Up @@ -251,6 +298,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
else:
element_type = None

rounding_mode = api_repr.get("roundingMode")
foreign_type_definition = api_repr.get("foreignTypeDefinition")

return cls(
field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
Expand All @@ -263,6 +313,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
scale=cls.__get_int(api_repr, "scale"),
max_length=cls.__get_int(api_repr, "maxLength"),
range_element_type=element_type,
rounding_mode=rounding_mode,
foreign_type_definition=foreign_type_definition,
)

@property
Expand Down Expand Up @@ -330,6 +382,22 @@ def range_element_type(self):
ret = self._properties.get("rangeElementType")
return FieldElementType.from_api_repr(ret)

@property
def rounding_mode(self):
"""Enum that specifies the rounding mode to be used when storing values of
NUMERIC and BIGNUMERIC type.
"""
return self._properties.get("roundingMode")

@property
def foreign_type_definition(self):
"""Definition of the foreign data type.

Only valid for top-level schema fields (not nested fields).
If the type is FOREIGN, this field is required.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any checks for this?

Copy link
Collaborator Author

@chalmerlowe chalmerlowe Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is now. 😺 Starting around line 250.

"""
return self._properties.get("foreignTypeDefinition")

@property
def fields(self):
"""Optional[tuple]: Subfields contained in this field.
Expand Down
66 changes: 63 additions & 3 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from google.cloud import bigquery
from google.cloud.bigquery.enums import RoundingMode
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import (
PolicyTagList,
Expand Down Expand Up @@ -52,9 +53,12 @@ def test_constructor_defaults(self):
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
self.assertIsNone(field.default_value_expression)
self.assertEqual(field.rounding_mode, None)
self.assertEqual(field.foreign_type_definition, None)

def test_constructor_explicit(self):
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED
field = self._make_one(
"test",
"STRING",
Expand All @@ -67,6 +71,8 @@ def test_constructor_explicit(self):
)
),
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
rounding_mode=ROUNDINGMODE,
foreign_type_definition="INTEGER",
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
Expand All @@ -83,9 +89,16 @@ def test_constructor_explicit(self):
)
),
)
self.assertEqual(field.rounding_mode, ROUNDINGMODE.name)
self.assertEqual(field.foreign_type_definition, "INTEGER")

def test_constructor_explicit_none(self):
field = self._make_one("test", "STRING", description=None, policy_tags=None)
field = self._make_one(
"test",
"STRING",
description=None,
policy_tags=None,
)
self.assertIsNone(field.description)
self.assertIsNone(field.policy_tags)

Expand Down Expand Up @@ -141,10 +154,18 @@ def test_to_api_repr(self):
policy.to_api_repr(),
{"names": ["foo", "bar"]},
)
ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED

field = self._make_one(
"foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy
"foo",
"INTEGER",
"NULLABLE",
description="hello world",
policy_tags=policy,
rounding_mode=ROUNDINGMODE,
foreign_type_definition=None,
)
print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}")
self.assertEqual(
field.to_api_repr(),
{
Expand All @@ -153,6 +174,7 @@ def test_to_api_repr(self):
"type": "INTEGER",
"description": "hello world",
"policyTags": {"names": ["foo", "bar"]},
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
},
)

Expand Down Expand Up @@ -186,6 +208,7 @@ def test_from_api_repr(self):
"description": "test_description",
"name": "foo",
"type": "record",
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
}
)
self.assertEqual(field.name, "foo")
Expand All @@ -197,6 +220,7 @@ def test_from_api_repr(self):
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
self.assertEqual(field.range_element_type, None)
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")

def test_from_api_repr_policy(self):
field = self._get_target_class().from_api_repr(
Expand Down Expand Up @@ -462,6 +486,32 @@ def test_to_standard_sql_unknown_type(self):
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
)

def test_to_standard_sql_foreign_type_valid(self):
legacy_type = "FOREIGN"
standard_type = bigquery.StandardSqlTypeNames.FOREIGN
foreign_type_definition = "INTEGER"

field = self._make_one(
"some_field",
field_type=legacy_type,
foreign_type_definition=foreign_type_definition,
)
standard_field = field.to_standard_sql()
self.assertEqual(standard_field.name, "some_field")
self.assertEqual(standard_field.type.type_kind, standard_type)

def test_to_standard_sql_foreign_type_invalid(self):
legacy_type = "FOREIGN"
foreign_type_definition = None

with self.assertRaises(ValueError) as context:
self._make_one(
"some_field",
field_type=legacy_type,
foreign_type_definition=foreign_type_definition,
)
self.assertTrue("If the 'field_type'" in context.exception.args[0])

def test___eq___wrong_type(self):
field = self._make_one("test", "STRING")
other = object()
Expand Down Expand Up @@ -1117,7 +1167,17 @@ def test_to_api_repr_parameterized(field, api):


class TestForeignTypeInfo:
"""TODO: add doc string."""
"""Tests metadata re: the foreign data type definition in field schema.

Specifies the system which defines the foreign data type.

TypeSystems are external systems, such as query engines or table formats,
that have their own data types.

TypeSystem may be:
TypeSystem not specified: TYPE_SYSTEM_UNSPECIFIED
Represents Hive data types: HIVE
"""

@staticmethod
def _get_target_class():
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5928,6 +5928,7 @@ def test_external_catalog_table_options_setter(
result = table.to_api_repr()
assert result == expected


@pytest.mark.parametrize("preserve_order", [True, False])
def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order):
pytest.importorskip("pandas")
Expand Down