diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8cbe9969..bb594bea2 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,9 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(enum.Enum): + ROUNDING_MODE_UNSPECIFIED = 0 + ROUND_HALF_AWAY_FROM_ZERO = 1 + ROUND_HALF_EVEN = 2 diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 79779b5df..cd9006d31 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -26,14 +26,14 @@ _isinstance_or_raise, _get_sub_prop, ) -from google.cloud.bigquery.enums import StandardSqlTypeNames +from google.cloud.bigquery.enums import StandardSqlTypeNames, RoundingMode _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -52,6 +52,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -170,6 +171,34 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -185,11 +214,12 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): - self._properties: Dict[str, Any] = { - "name": name, - "type": field_type, - } + self._properties: Dict[str, Any] = {} + + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -210,6 +240,23 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if isinstance(rounding_mode, RoundingMode): + self._properties["roundingMode"] = rounding_mode.name + if isinstance(rounding_mode, str): + self._properties["roundingMode"] = rounding_mode + if isinstance(foreign_type_definition, str): + self._properties["foreignTypeDefinition"] = foreign_type_definition + + # The order of operations is important: + # If field_type is FOREIGN, then foreign_type_definition must be set. + if field_type != "FOREIGN": + self._properties["type"] = field_type + else: + if self._properties.get("foreignTypeDefinition") is None: + raise ValueError( + "If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required." + ) + self._properties["type"] = field_type self._fields = tuple(fields) @@ -251,6 +298,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": else: element_type = None + rounding_mode = api_repr.get("roundingMode") + foreign_type_definition = api_repr.get("foreignTypeDefinition") + return cls( field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], @@ -263,6 +313,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), range_element_type=element_type, + rounding_mode=rounding_mode, + foreign_type_definition=foreign_type_definition, ) @property @@ -330,6 +382,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 61b1b6997..806b86672 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -13,6 +13,7 @@ # limitations under the License. from google.cloud import bigquery +from google.cloud.bigquery.enums import RoundingMode from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import ( PolicyTagList, @@ -52,9 +53,12 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" + ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED field = self._make_one( "test", "STRING", @@ -67,6 +71,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=ROUNDINGMODE, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -83,9 +89,16 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, ROUNDINGMODE.name) + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): - field = self._make_one("test", "STRING", description=None, policy_tags=None) + field = self._make_one( + "test", + "STRING", + description=None, + policy_tags=None, + ) self.assertIsNone(field.description) self.assertIsNone(field.policy_tags) @@ -141,10 +154,18 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) + print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}") self.assertEqual( field.to_api_repr(), { @@ -153,6 +174,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -186,6 +208,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -197,6 +220,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -462,6 +486,32 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + + def test_to_standard_sql_foreign_type_invalid(self): + legacy_type = "FOREIGN" + foreign_type_definition = None + + with self.assertRaises(ValueError) as context: + self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + self.assertTrue("If the 'field_type'" in context.exception.args[0]) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() @@ -1117,7 +1167,17 @@ def test_to_api_repr_parameterized(field, api): class TestForeignTypeInfo: - """TODO: add doc string.""" + """Tests metadata re: the foreign data type definition in field schema. + + Specifies the system which defines the foreign data type. + + TypeSystems are external systems, such as query engines or table formats, + that have their own data types. + + TypeSystem may be: + TypeSystem not specified: TYPE_SYSTEM_UNSPECIFIED + Represents Hive data types: HIVE + """ @staticmethod def _get_target_class(): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 9c872be9e..023510731 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -5928,6 +5928,7 @@ def test_external_catalog_table_options_setter( result = table.to_api_repr() assert result == expected + @pytest.mark.parametrize("preserve_order", [True, False]) def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): pytest.importorskip("pandas")