Skip to content

Commit 15d4bb6

Browse files
plamuttswast
authored andcommitted
feat(bigquery): add support for hive partitioning options configuration (#9626)
* feat(bigquery): add hive partitioning options to external config * Mark ExternalConfig.options property as optional * Support hive partitioning options in LoadJobConfig * Mark hive partitioning class and propertis as beta
1 parent 0b69ee0 commit 15d4bb6

File tree

4 files changed

+213
-1
lines changed

4 files changed

+213
-1
lines changed

bigquery/google/cloud/bigquery/external_config.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,76 @@ def from_api_repr(cls, resource):
543543
_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions)
544544

545545

546+
class HivePartitioningOptions(object):
547+
"""[Beta] Options that configure hive partitioning.
548+
549+
.. note::
550+
**Experimental**. This feature is experimental and might change or
551+
have limited support.
552+
553+
See
554+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
555+
"""
556+
557+
def __init__(self):
558+
self._properties = {}
559+
560+
@property
561+
def mode(self):
562+
"""Optional[str]: When set, what mode of hive partitioning to use when reading data.
563+
564+
Two modes are supported: "AUTO" and "STRINGS".
565+
566+
See
567+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
568+
"""
569+
return self._properties.get("mode")
570+
571+
@mode.setter
572+
def mode(self, value):
573+
self._properties["mode"] = value
574+
575+
@property
576+
def source_uri_prefix(self):
577+
"""Optional[str]: When hive partition detection is requested, a common prefix for
578+
all source URIs is required.
579+
580+
See
581+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
582+
"""
583+
return self._properties.get("sourceUriPrefix")
584+
585+
@source_uri_prefix.setter
586+
def source_uri_prefix(self, value):
587+
self._properties["sourceUriPrefix"] = value
588+
589+
def to_api_repr(self):
590+
"""Build an API representation of this object.
591+
592+
Returns:
593+
Dict[str, Any]: A dictionary in the format used by the BigQuery API.
594+
"""
595+
return copy.deepcopy(self._properties)
596+
597+
@classmethod
598+
def from_api_repr(cls, resource):
599+
"""Factory: construct a :class:`~.external_config.HivePartitioningOptions`
600+
instance given its API representation.
601+
602+
Args:
603+
resource (Dict[str, Any]):
604+
Definition of a :class:`~.external_config.HivePartitioningOptions`
605+
instance in the same representation as is returned from the
606+
API.
607+
608+
Returns:
609+
HivePartitioningOptions: Configuration parsed from ``resource``.
610+
"""
611+
config = cls()
612+
config._properties = copy.deepcopy(resource)
613+
return config
614+
615+
546616
class ExternalConfig(object):
547617
"""Description of an external data source.
548618
@@ -571,7 +641,7 @@ def source_format(self):
571641

572642
@property
573643
def options(self):
574-
"""Dict[str, Any]: Source-specific options."""
644+
"""Optional[Dict[str, Any]]: Source-specific options."""
575645
return self._options
576646

577647
@property
@@ -601,6 +671,28 @@ def compression(self):
601671
def compression(self, value):
602672
self._properties["compression"] = value
603673

674+
@property
675+
def hive_partitioning(self):
676+
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
677+
it configures hive partitioning support.
678+
679+
.. note::
680+
**Experimental**. This feature is experimental and might change or
681+
have limited support.
682+
683+
See
684+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
685+
"""
686+
prop = self._properties.get("hivePartitioningOptions")
687+
if prop is None:
688+
return None
689+
return HivePartitioningOptions.from_api_repr(prop)
690+
691+
@hive_partitioning.setter
692+
def hive_partitioning(self, value):
693+
prop = value.to_api_repr() if value is not None else None
694+
self._properties["hivePartitioningOptions"] = prop
695+
604696
@property
605697
def ignore_unknown_values(self):
606698
"""bool: If :data:`True`, extra values that are not represented in the

bigquery/google/cloud/bigquery/job.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from google.cloud.bigquery.dataset import DatasetReference
3030
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
3131
from google.cloud.bigquery.external_config import ExternalConfig
32+
from google.cloud.bigquery.external_config import HivePartitioningOptions
3233
from google.cloud.bigquery import _helpers
3334
from google.cloud.bigquery.query import _query_param_from_api_repr
3435
from google.cloud.bigquery.query import ArrayQueryParameter
@@ -1138,6 +1139,33 @@ def field_delimiter(self):
11381139
def field_delimiter(self, value):
11391140
self._set_sub_prop("fieldDelimiter", value)
11401141

1142+
@property
1143+
def hive_partitioning(self):
1144+
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
1145+
it configures hive partitioning support.
1146+
1147+
.. note::
1148+
**Experimental**. This feature is experimental and might change or
1149+
have limited support.
1150+
1151+
See
1152+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options
1153+
"""
1154+
prop = self._get_sub_prop("hivePartitioningOptions")
1155+
if prop is None:
1156+
return None
1157+
return HivePartitioningOptions.from_api_repr(prop)
1158+
1159+
@hive_partitioning.setter
1160+
def hive_partitioning(self, value):
1161+
if value is not None:
1162+
if isinstance(value, HivePartitioningOptions):
1163+
value = value.to_api_repr()
1164+
else:
1165+
raise TypeError("Expected a HivePartitioningOptions instance or None.")
1166+
1167+
self._set_sub_prop("hivePartitioningOptions", value)
1168+
11411169
@property
11421170
def ignore_unknown_values(self):
11431171
"""bool: Ignore extra values not represented in the table schema.

bigquery/tests/unit/test_external_config.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,58 @@ def test_to_api_repr_sheets(self):
173173

174174
self.assertEqual(got_resource, exp_resource)
175175

176+
def test_from_api_repr_hive_partitioning(self):
177+
resource = _copy_and_update(
178+
self.BASE_RESOURCE,
179+
{
180+
"sourceFormat": "FORMAT_FOO",
181+
"hivePartitioningOptions": {
182+
"sourceUriPrefix": "http://foo/bar",
183+
"mode": "STRINGS",
184+
},
185+
},
186+
)
187+
188+
ec = external_config.ExternalConfig.from_api_repr(resource)
189+
190+
self._verify_base(ec)
191+
self.assertEqual(ec.source_format, "FORMAT_FOO")
192+
self.assertIsInstance(
193+
ec.hive_partitioning, external_config.HivePartitioningOptions
194+
)
195+
self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar")
196+
self.assertEqual(ec.hive_partitioning.mode, "STRINGS")
197+
198+
# converting back to API representation should yield the same result
199+
got_resource = ec.to_api_repr()
200+
self.assertEqual(got_resource, resource)
201+
202+
del resource["hivePartitioningOptions"]
203+
ec = external_config.ExternalConfig.from_api_repr(resource)
204+
self.assertIsNone(ec.hive_partitioning)
205+
206+
got_resource = ec.to_api_repr()
207+
self.assertEqual(got_resource, resource)
208+
209+
def test_to_api_repr_hive_partitioning(self):
210+
hive_partitioning = external_config.HivePartitioningOptions()
211+
hive_partitioning.source_uri_prefix = "http://foo/bar"
212+
hive_partitioning.mode = "STRINGS"
213+
214+
ec = external_config.ExternalConfig("FORMAT_FOO")
215+
ec.hive_partitioning = hive_partitioning
216+
217+
got_resource = ec.to_api_repr()
218+
219+
expected_resource = {
220+
"sourceFormat": "FORMAT_FOO",
221+
"hivePartitioningOptions": {
222+
"sourceUriPrefix": "http://foo/bar",
223+
"mode": "STRINGS",
224+
},
225+
}
226+
self.assertEqual(got_resource, expected_resource)
227+
176228
def test_from_api_repr_csv(self):
177229
resource = _copy_and_update(
178230
self.BASE_RESOURCE,

bigquery/tests/unit/test_job.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,46 @@ def test_field_delimiter_setter(self):
14481448
config.field_delimiter = field_delimiter
14491449
self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter)
14501450

1451+
def test_hive_partitioning_missing(self):
1452+
config = self._get_target_class()()
1453+
self.assertIsNone(config.hive_partitioning)
1454+
1455+
def test_hive_partitioning_hit(self):
1456+
from google.cloud.bigquery.external_config import HivePartitioningOptions
1457+
1458+
config = self._get_target_class()()
1459+
config._properties["load"]["hivePartitioningOptions"] = {
1460+
"sourceUriPrefix": "http://foo/bar",
1461+
"mode": "STRINGS",
1462+
}
1463+
result = config.hive_partitioning
1464+
self.assertIsInstance(result, HivePartitioningOptions)
1465+
self.assertEqual(result.source_uri_prefix, "http://foo/bar")
1466+
self.assertEqual(result.mode, "STRINGS")
1467+
1468+
def test_hive_partitioning_setter(self):
1469+
from google.cloud.bigquery.external_config import HivePartitioningOptions
1470+
1471+
hive_partitioning = HivePartitioningOptions()
1472+
hive_partitioning.source_uri_prefix = "http://foo/bar"
1473+
hive_partitioning.mode = "AUTO"
1474+
1475+
config = self._get_target_class()()
1476+
config.hive_partitioning = hive_partitioning
1477+
self.assertEqual(
1478+
config._properties["load"]["hivePartitioningOptions"],
1479+
{"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"},
1480+
)
1481+
1482+
config.hive_partitioning = None
1483+
self.assertIsNone(config._properties["load"]["hivePartitioningOptions"])
1484+
1485+
def test_hive_partitioning_invalid_type(self):
1486+
config = self._get_target_class()()
1487+
1488+
with self.assertRaises(TypeError):
1489+
config.hive_partitioning = {"mode": "AUTO"}
1490+
14511491
def test_ignore_unknown_values_missing(self):
14521492
config = self._get_target_class()()
14531493
self.assertIsNone(config.ignore_unknown_values)

0 commit comments

Comments
 (0)