From d3afd2ab6e61bbdaa43af026375fc3e3d3b4dad4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Oct 2021 11:48:23 +0300 Subject: [PATCH 01/18] process: add mypy types check to nox sessions --- noxfile.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index d41573407..64dd3a9cc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -22,6 +22,7 @@ import nox +MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -108,9 +109,24 @@ def unit_noextras(session): default(session, install_extras=False) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def mypy(session): + """Run type checks with mypy.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(MYPY_VERSION) + + # Just install the dependencies' type info directly, since "mypy --install-types" + # might require an additional pass. + session.install( + "types-protobuf", "types-python-dateutil", "types-requests", "types-setuptools", + ) + session.run("mypy", "google/cloud") + + @nox.session(python=DEFAULT_PYTHON_VERSION) def pytype(session): - """Run type checks.""" + """Run type checks with pytype.""" # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 From d9ca3ec07cc4e246c580b1dea67c544a03f328aa Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Oct 2021 16:57:31 +0300 Subject: [PATCH 02/18] Ignore type errors for not annotated modules Several dependencies lack type annotations, or they don't advertise themselves as type-annotated. We do not want `mypy` to complain about these. --- google/cloud/bigquery/_helpers.py | 4 ++-- google/cloud/bigquery/_pandas_helpers.py | 8 ++++---- google/cloud/bigquery/_tqdm_helpers.py | 2 +- google/cloud/bigquery/client.py | 12 ++++++------ google/cloud/bigquery/dataset.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 2 +- google/cloud/bigquery/job/base.py | 4 ++-- google/cloud/bigquery/job/query.py | 10 +++++----- google/cloud/bigquery/magics/magics.py | 14 +++++++------- google/cloud/bigquery/model.py | 4 ++-- google/cloud/bigquery/opentelemetry_tracing.py | 2 +- google/cloud/bigquery/retry.py | 4 ++-- google/cloud/bigquery/routine/routine.py | 2 +- google/cloud/bigquery/table.py | 14 +++++++------- 14 files changed, 42 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index e95d38545..e2ca7fa07 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ from typing import Any, Optional, Union from dateutil import relativedelta -from google.cloud._helpers import UTC +from google.cloud._helpers import UTC # type: ignore from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _RFC3339_MICROS @@ -126,7 +126,7 @@ def __init__(self): def installed_version(self) -> packaging.version.Version: """Return the parsed version of pyarrow.""" if self._installed_version is None: - import pyarrow + import pyarrow # type: ignore self._installed_version = packaging.version.parse( # Use 0.0.0, since it is earlier than any released version. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0cb851469..de6356c2a 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -21,7 +21,7 @@ import warnings try: - import pandas + import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None else: @@ -29,7 +29,7 @@ try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` - from shapely.geometry.base import BaseGeometry as _BaseGeometry + from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore except ImportError: # pragma: NO COVER # No shapely, use NoneType for _BaseGeometry as a placeholder. _BaseGeometry = type(None) @@ -43,7 +43,7 @@ def _to_wkb(): # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. # - Caches the WKBWriter (and write method lookup :) ) # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. - from shapely.geos import WKBWriter, lgeos + from shapely.geos import WKBWriter, lgeos # type: ignore write = WKBWriter(lgeos).write notnull = pandas.notnull @@ -574,7 +574,7 @@ def dataframe_to_parquet( """ pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) - import pyarrow.parquet + import pyarrow.parquet # type: ignore kwargs = ( {"use_compliant_nested_type": parquet_use_compliant_nested_type} diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index 99e720e2b..632f70f87 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -21,7 +21,7 @@ import warnings try: - import tqdm + import tqdm # type: ignore except ImportError: # pragma: NO COVER tqdm = None diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 9cb6af8f0..e2f71db08 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -33,17 +33,17 @@ import warnings from google import resumable_media # type: ignore -from google.resumable_media.requests import MultipartUpload +from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload -import google.api_core.client_options -import google.api_core.exceptions as core_exceptions -from google.api_core.iam import Policy +import google.api_core.client_options # type: ignore +import google.api_core.exceptions as core_exceptions # type: ignore +from google.api_core.iam import Policy # type: ignore from google.api_core import page_iterator from google.api_core import retry as retries -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error -from google.cloud.client import ClientWithProject # pytype: disable=import-error +from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error try: from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 21e56f305..ff015d605 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -18,7 +18,7 @@ import copy -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index b1239ff57..03f3b72ca 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -31,7 +31,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions -import google.cloud.exceptions +import google.cloud.exceptions # type: ignore _LOGGER = logging.getLogger(__name__) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 88d6bec14..5cc086ad8 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -21,8 +21,8 @@ import typing from typing import Dict, Optional, Sequence -from google.api_core import exceptions -import google.api_core.future.polling +from google.api_core import exceptions # type: ignore +import google.api_core.future.polling # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 942c85fc3..b85b34203 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -20,8 +20,8 @@ import typing from typing import Any, Dict, Iterable, List, Optional, Union -from google.api_core import exceptions -from google.api_core.future import polling as polling_future +from google.api_core import exceptions # type: ignore +from google.api_core.future import polling as polling_future # type: ignore import requests from google.cloud.bigquery.dataset import Dataset @@ -56,9 +56,9 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. - import pandas - import geopandas - import pyarrow + import pandas # type: ignore + import geopandas # type: ignore + import pyarrow # type: ignore from google.api_core import retry as retries from google.cloud import bigquery_storage from google.cloud.bigquery.client import Client diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index d368bbeaa..6c358c4d7 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -156,16 +156,16 @@ from concurrent import futures try: - import IPython - from IPython import display - from IPython.core import magic_arguments + import IPython # type: ignore + from IPython import display # type: ignore + from IPython.core import magic_arguments # type: ignore except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") -from google.api_core import client_info +from google.api_core import client_info # type: ignore from google.api_core import client_options -from google.api_core.exceptions import NotFound -import google.auth +from google.api_core.exceptions import NotFound # type: ignore +import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset from google.cloud.bigquery.dbapi import _helpers @@ -784,7 +784,7 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): raise customized_error from err try: - from google.api_core.gapic_v1 import client_info as gapic_client_info + from google.api_core.gapic_v1 import client_info as gapic_client_info # type: ignore except ImportError as err: customized_error = ImportError( "Install the grpcio package to use the BigQuery Storage API." diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 2d3f6660f..cdb411e08 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -20,8 +20,8 @@ from google.protobuf import json_format -import google.cloud._helpers -from google.api_core import datetime_helpers +import google.cloud._helpers # type: ignore +from google.api_core import datetime_helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery_v2 import types from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 57f258ac4..3469710ad 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -14,7 +14,7 @@ import logging from contextlib import contextmanager -from google.api_core.exceptions import GoogleAPICallError +from google.api_core.exceptions import GoogleAPICallError # type: ignore logger = logging.getLogger(__name__) try: diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 8a86973cd..c7aefe23e 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.api_core import exceptions +from google.api_core import exceptions # type: ignore from google.api_core import retry -from google.auth import exceptions as auth_exceptions +from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index a776212c3..a66434300 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -18,7 +18,7 @@ from google.protobuf import json_format -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types from google.cloud.bigquery_v2.types import StandardSqlTableType diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 608218fdc..427908a80 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -25,33 +25,33 @@ import warnings try: - import pandas + import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None try: - import geopandas + import geopandas # type: ignore except ImportError: geopandas = None else: _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: - import shapely.geos + import shapely.geos # type: ignore except ImportError: shapely = None else: _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read try: - import pyarrow + import pyarrow # type: ignore except ImportError: # pragma: NO COVER pyarrow = None -import google.api_core.exceptions -from google.api_core.page_iterator import HTTPIterator +import google.api_core.exceptions # type: ignore +from google.api_core.page_iterator import HTTPIterator # type: ignore -import google.cloud._helpers +import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError From 8ba85955ea3ba6d9ff424cca59f7726c4153001b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Oct 2021 19:00:05 +0300 Subject: [PATCH 03/18] Fix mypy complaints (batch 1) --- google/cloud/__init__.py | 5 ++++- google/cloud/bigquery/_http.py | 2 +- google/cloud/bigquery/dbapi/_helpers.py | 16 +++++++++++----- .../bigquery/magics/line_arg_parser/lexer.py | 2 +- google/cloud/bigquery/opentelemetry_tracing.py | 4 ++-- google/cloud/bigquery/schema.py | 4 ++-- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py index 8fcc60e2b..1fcfd4f50 100644 --- a/google/cloud/__init__.py +++ b/google/cloud/__init__.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterable + try: import pkg_resources @@ -21,4 +23,5 @@ except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) + # Use Iterable instead of List, otherwise pytype (incorrectly) complains + __path__: Iterable[str] = pkgutil.extend_path(__path__, __name__) diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index 81e7922e6..f7207f32e 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -17,7 +17,7 @@ import os import pkg_resources -from google.cloud import _http # pytype: disable=import-error +from google.cloud import _http # type: ignore # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 72e711bcf..e5c7ef7ec 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -161,7 +161,7 @@ def _parse_struct_fields( yield m.group(1, 2) -SCALAR, ARRAY, STRUCT = "sar" +SCALAR, ARRAY, STRUCT = ("s", "a", "r") def _parse_type( @@ -226,19 +226,19 @@ def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: s type_type, sub_type = _parse_type(type_, name, base) if type_type == SCALAR: - type_ = sub_type + result_type = sub_type elif type_type == ARRAY: - type_ = query.ArrayQueryParameterType(sub_type, name=name) + result_type = query.ArrayQueryParameterType(sub_type, name=name) elif type_type == STRUCT: fields = [ complex_query_parameter_type(field_name, field_type, base) for field_name, field_type in sub_type ] - type_ = query.StructQueryParameterType(*fields, name=name) + result_type = query.StructQueryParameterType(*fields, name=name) else: # pragma: NO COVER raise AssertionError("Bad type_type", type_type) # Can't happen :) - return type_ + return result_type def complex_query_parameter( @@ -256,6 +256,12 @@ def complex_query_parameter( struct>> """ + param: typing.Union[ + query.ScalarQueryParameter, + query.ArrayQueryParameter, + query.StructQueryParameter, + ] + base = base or type_ type_type, sub_type = _parse_type(type_, name, base) diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index cd809c389..71b287d01 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -98,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types +TokenType = AutoStrEnum( # type: ignore # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 3469710ad..7978a5ca3 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) try: from opentelemetry import trace - from opentelemetry.instrumentation.utils import http_status_to_canonical_code + from opentelemetry.instrumentation.utils import http_status_to_status_code from opentelemetry.trace.status import Status HAS_OPENTELEMETRY = True @@ -81,7 +81,7 @@ def create_span(name, attributes=None, client=None, job_ref=None): yield span except GoogleAPICallError as error: if error.code is not None: - span.set_status(Status(http_status_to_canonical_code(error.code))) + span.set_status(Status(http_status_to_status_code(error.code))) raise diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 5bad52273..6ebfbdc40 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,7 +16,7 @@ import collections import enum -from typing import Iterable, Union +from typing import Any, Dict, Iterable, Union from google.cloud.bigquery_v2 import types @@ -106,7 +106,7 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): - self._properties = { + self._properties: Dict[str, Any] = { "name": name, "type": field_type, } From 31ce9668c7249bb352d7e5972fc01afee4e375c2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 28 Oct 2021 16:28:07 +0300 Subject: [PATCH 04/18] Fix mypy complaints (batch 2) --- google/cloud/bigquery/client.py | 148 +++++++++++++++++--------------- google/cloud/bigquery/table.py | 4 +- 2 files changed, 82 insertions(+), 70 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e2f71db08..50612224d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -28,7 +28,18 @@ import math import os import tempfile -from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union +import typing +from typing import ( + Any, + BinaryIO, + Dict, + Iterable, + List, + Optional, + Sequence, + Tuple, + Union, +) import uuid import warnings @@ -100,6 +111,7 @@ pyarrow = _helpers.PYARROW_VERSIONS.try_import() +PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 @@ -248,7 +260,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -295,7 +307,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -361,7 +373,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -400,7 +412,7 @@ def list_datasets( Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. associated with the project. """ - extra_params = {} + extra_params: Dict[str, Any] = {} if project is None: project = self.project if include_all: @@ -531,7 +543,7 @@ def _ensure_bqstorage_client( return bqstorage_client - def _dataset_from_arg(self, dataset): + def _dataset_from_arg(self, dataset) -> Union[Dataset, DatasetReference]: if isinstance(dataset, str): dataset = DatasetReference.from_string( dataset, default_project=self.project @@ -552,7 +564,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference, DatasetListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -627,7 +639,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -682,7 +694,7 @@ def create_table( table: Union[str, Table, TableReference, TableListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -765,7 +777,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -809,7 +821,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -839,7 +851,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -872,7 +884,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -897,7 +909,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -940,7 +952,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -984,7 +996,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1026,7 +1038,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1073,7 +1085,7 @@ def update_dataset( """ partial = dataset._build_resource(fields) if dataset.etag is not None: - headers = {"If-Match": dataset.etag} + headers: Optional[Dict[str, str]] = {"If-Match": dataset.etag} else: headers = None path = dataset.path @@ -1096,7 +1108,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1137,7 +1149,7 @@ def update_model( """ partial = model._build_resource(fields) if model.etag: - headers = {"If-Match": model.etag} + headers: Optional[Dict[str, str]] = {"If-Match": model.etag} else: headers = None path = model.path @@ -1160,7 +1172,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1207,7 +1219,7 @@ def update_routine( """ partial = routine._build_resource(fields) if routine.etag: - headers = {"If-Match": routine.etag} + headers: Optional[Dict[str, str]] = {"If-Match": routine.etag} else: headers = None @@ -1234,7 +1246,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1275,7 +1287,7 @@ def update_table( """ partial = table._build_resource(fields) if table.etag is not None: - headers = {"If-Match": table.etag} + headers: Optional[Dict[str, str]] = {"If-Match": table.etag} else: headers = None @@ -1300,7 +1312,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1377,7 +1389,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1454,7 +1466,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1529,7 +1541,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, DatasetListItem, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1588,7 +1600,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1642,7 +1654,7 @@ def delete_job_metadata( project: Optional[str] = None, location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1705,7 +1717,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1759,7 +1771,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1812,7 +1824,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1837,7 +1849,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {"maxResults": 0} + extra_params: Dict[str, Any] = {"maxResults": 0} if timeout is not None: timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) @@ -1901,7 +1913,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1934,7 +1946,7 @@ def create_job( return self.load_table_from_uri( source_uris, destination, - job_config=load_job_config, + job_config=typing.cast(LoadJobConfig, load_job_config), retry=retry, timeout=timeout, ) @@ -1954,7 +1966,7 @@ def create_job( return self.copy_table( sources, destination, - job_config=copy_job_config, + job_config=typing.cast(CopyJobConfig, copy_job_config), retry=retry, timeout=timeout, ) @@ -1974,7 +1986,7 @@ def create_job( return self.extract_table( source, destination_uris, - job_config=extract_job_config, + job_config=typing.cast(ExtractJobConfig, extract_job_config), retry=retry, timeout=timeout, source_type=source_type, @@ -1987,7 +1999,10 @@ def create_job( ) query = _get_sub_prop(copy_config, ["query", "query"]) return self.query( - query, job_config=query_job_config, retry=retry, timeout=timeout + query, + job_config=typing.cast(QueryJobConfig, query_job_config), + retry=retry, + timeout=timeout, ) else: raise TypeError("Invalid job configuration received.") @@ -1998,7 +2013,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2072,7 +2087,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2149,7 +2164,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2264,7 +2279,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2348,7 +2363,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2451,7 +2466,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2591,7 +2606,7 @@ def load_table_from_dataframe( try: table = self.get_table(destination) except core_exceptions.NotFound: - table = None + pass else: columns_and_indexes = frozenset( name @@ -2706,7 +2721,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2989,7 +3004,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3092,7 +3107,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3190,7 +3205,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3347,7 +3362,7 @@ def insert_rows( table: Union[Table, TableReference, str], rows: Union[Iterable[Tuple], Iterable[Dict]], selected_fields: Sequence[SchemaField] = None, - **kwargs: dict, + **kwargs, ) -> Sequence[dict]: """Insert rows into a table via the streaming API. @@ -3472,7 +3487,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3539,8 +3554,8 @@ def insert_rows_json( # insert_rows_json doesn't need the table schema. It's not doing any # type conversions. table = _table_arg_to_table_ref(table, default_project=self.project) - rows_info = [] - data = {"rows": rows_info} + rows_info: List[Any] = [] + data: Dict[str, Any] = {"rows": rows_info} if row_ids is None: warnings.warn( @@ -3558,7 +3573,7 @@ def insert_rows_json( raise TypeError(msg) for i, row in enumerate(json_rows): - info = {"json": row} + info: Dict[str, Any] = {"json": row} if row_ids is AutoRowIDs.GENERATE_UUID: info["insertId"] = str(uuid.uuid4()) @@ -3607,7 +3622,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3657,7 +3672,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3733,7 +3748,7 @@ def list_rows( table = self.get_table(table.reference, retry=retry, timeout=timeout) schema = table.schema - params = {} + params: Dict[str, Any] = {} if selected_fields is not None: params["selectedFields"] = ",".join(field.name for field in selected_fields) if start_index is not None: @@ -3769,7 +3784,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = DEFAULT_TIMEOUT, + timeout: Optional[float] = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See @@ -3814,7 +3829,7 @@ def _list_rows_from_query_results( Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s. """ - params = { + params: Dict[str, Any] = { "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": location, } @@ -3855,7 +3870,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: Union[str, BinaryIO]): + def schema_from_json(self, file_or_path: PathType): """Takes a file object or file path that contains json that describes a table schema. @@ -3868,9 +3883,7 @@ def schema_from_json(self, file_or_path: Union[str, BinaryIO]): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json( - self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] - ): + def schema_to_json(self, schema_list: Sequence[SchemaField], destination: PathType): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. @@ -4011,13 +4024,12 @@ def _extract_job_reference(job, project=None, location=None): return (project, location, job_id) -def _make_job_id(job_id, prefix=None): +def _make_job_id(job_id: Optional[str], prefix: Optional[str] = None) -> str: """Construct an ID for a new job. Args: - job_id (Optional[str]): the user-provided job ID. - - prefix (Optional[str]): the user-provided prefix for a job ID. + job_id: the user-provided job ID. + prefix: the user-provided prefix for a job ID. Returns: str: A job ID diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 427908a80..6cc651ff2 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2657,7 +2657,7 @@ def _rows_page_start(iterator, page, response): # pylint: enable=unused-argument -def _table_arg_to_table_ref(value, default_project=None): +def _table_arg_to_table_ref(value, default_project=None) -> TableReference: """Helper to convert a string or Table to TableReference. This function keeps TableReference and other kinds of objects unchanged. @@ -2669,7 +2669,7 @@ def _table_arg_to_table_ref(value, default_project=None): return value -def _table_arg_to_table(value, default_project=None): +def _table_arg_to_table(value, default_project=None) -> Table: """Helper to convert a string or TableReference to a Table. This function keeps Table and other kinds of objects unchanged. From 2ba8cee68037041e3371174f01f6628ed94e2809 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 28 Oct 2021 18:05:04 +0300 Subject: [PATCH 05/18] Fix mypy complaints (batch 3) --- google/cloud/__init__.py | 5 +--- google/cloud/bigquery/external_config.py | 29 ++++++++++++++---------- google/cloud/bigquery/job/base.py | 2 +- google/cloud/bigquery/job/query.py | 23 ++++++++----------- google/cloud/bigquery/query.py | 8 +++---- google/cloud/bigquery/table.py | 14 ++++++------ 6 files changed, 40 insertions(+), 41 deletions(-) diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py index 1fcfd4f50..8e60d8439 100644 --- a/google/cloud/__init__.py +++ b/google/cloud/__init__.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable - try: import pkg_resources @@ -23,5 +21,4 @@ except ImportError: import pkgutil - # Use Iterable instead of List, otherwise pytype (incorrectly) complains - __path__: Iterable[str] = pkgutil.extend_path(__path__, __name__) + __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 5f284c639..6df24a9e9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -556,6 +556,10 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": ParquetOptions, ) +OptionsType = Union[ + AvroOptions, BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions, +] + class HivePartitioningOptions(object): """[Beta] Options that configure hive partitioning. @@ -664,13 +668,14 @@ def source_format(self): return self._properties["sourceFormat"] @property - def options(self) -> Optional[Union[_OPTION_CLASSES]]: + def options(self) -> Optional[OptionsType]: """Source-specific options.""" for optcls in _OPTION_CLASSES: - if self.source_format == optcls._SOURCE_FORMAT: - options = optcls() - self._properties.setdefault(optcls._RESOURCE_NAME, {}) - options._properties = self._properties[optcls._RESOURCE_NAME] + # The code below is too much magic for mypy to handle. + if self.source_format == optcls._SOURCE_FORMAT: # type: ignore + options: OptionsType = optcls() # type: ignore + self._properties.setdefault(optcls._RESOURCE_NAME, {}) # type: ignore + options._properties = self._properties[optcls._RESOURCE_NAME] # type: ignore return options # No matching source format found. @@ -799,6 +804,13 @@ def schema(self): prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + @schema.setter + def schema(self, value): + prop = value + if value is not None: + prop = {"fields": [field.to_api_repr() for field in value]} + self._properties["schema"] = prop + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API @@ -816,13 +828,6 @@ def connection_id(self): def connection_id(self, value): self._properties["connectionId"] = value - @schema.setter - def schema(self, value): - prop = value - if value is not None: - prop = {"fields": [field.to_api_repr() for field in value]} - self._properties["schema"] = prop - @property def avro_options(self) -> Optional[AvroOptions]: """Additional properties to set if ``sourceFormat`` is set to AVRO. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5cc086ad8..de028a178 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -921,7 +921,7 @@ def from_api_repr(cls, resource: dict) -> "_JobConfig": # cls is one of the job config subclasses that provides the job_type argument to # this base class on instantiation, thus missing-parameter warning is a false # positive here. - job_config = cls() # pytype: disable=missing-parameter + job_config = cls() # type: ignore # pytype: disable=missing-parameter job_config._properties = resource return job_config diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b85b34203..c9e80f62d 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -144,7 +144,7 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": args = ( int(stats.get(api_field, default_val)) - for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) # type: ignore ) return cls(*args) @@ -161,7 +161,7 @@ def __init__( statement_byte_budget: Optional[int] = None, key_result_statement: Optional[KeyResultStatementKind] = None, ): - self._properties = {} + self._properties: Dict[str, Any] = {} self.statement_timeout_ms = statement_timeout_ms self.statement_byte_budget = statement_byte_budget self.key_result_statement = key_result_statement @@ -193,9 +193,8 @@ def statement_timeout_ms(self) -> Union[int, None]: @statement_timeout_ms.setter def statement_timeout_ms(self, value: Union[int, None]): - if value is not None: - value = str(value) - self._properties["statementTimeoutMs"] = value + new_value = None if value is None else str(value) + self._properties["statementTimeoutMs"] = new_value @property def statement_byte_budget(self) -> Union[int, None]: @@ -207,9 +206,8 @@ def statement_byte_budget(self) -> Union[int, None]: @statement_byte_budget.setter def statement_byte_budget(self, value: Union[int, None]): - if value is not None: - value = str(value) - self._properties["statementByteBudget"] = value + new_value = None if value is None else str(value) + self._properties["statementByteBudget"] = new_value @property def key_result_statement(self) -> Union[KeyResultStatementKind, None]: @@ -666,9 +664,8 @@ def script_options(self) -> ScriptOptions: @script_options.setter def script_options(self, value: Union[ScriptOptions, None]): - if value is not None: - value = value.to_api_repr() - self._set_sub_prop("scriptOptions", value) + new_value = None if value is None else value.to_api_repr() + self._set_sub_prop("scriptOptions", new_value) def to_api_repr(self) -> dict: """Build an API representation of the query job config. @@ -1330,7 +1327,7 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): except exceptions.GoogleAPIError as exc: self.set_exception(exc) - def result( + def result( # type: ignore # (complaints about the overloaded signature) self, page_size: int = None, max_results: int = None, @@ -1400,7 +1397,7 @@ def result( retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: if job_retry is DEFAULT_JOB_RETRY: - job_retry = self._job_retry + job_retry = self._job_retry # type: ignore else: if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: raise TypeError( diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 708f5f47b..637be62be 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -367,14 +367,14 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": + def from_api_repr(cls, resource: dict) -> "_AbstractQueryParameter": """Factory: construct parameter from JSON resource. Args: resource (Dict): JSON mapping of parameter Returns: - google.cloud.bigquery.query.ScalarQueryParameter + A new instance of _AbstractQueryParameter subclass. """ raise NotImplementedError @@ -471,7 +471,7 @@ def to_api_repr(self) -> dict: converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) if converter is not None: value = converter(value) - resource = { + resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, } @@ -734,7 +734,7 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": struct_values = resource["parameterValue"]["structValues"] for key, value in struct_values.items(): type_ = types[key] - converted = None + converted: Optional[Union[ArrayQueryParameter, StructQueryParameter]] = None if type_ == "STRUCT": struct_resource = { "name": key, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 6cc651ff2..7182c2d2b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,7 +21,7 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, Optional, Tuple +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union import warnings try: @@ -130,7 +130,7 @@ def _view_use_legacy_sql_getter(table): class _TableBase: """Base class for Table-related classes with common functionality.""" - _PROPERTY_TO_API_FIELD = { + _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = { "dataset_id": ["tableReference", "datasetId"], "project": ["tableReference", "projectId"], "table_id": ["tableReference", "tableId"], @@ -807,7 +807,7 @@ def view_query(self): view_use_legacy_sql = property(_view_use_legacy_sql_getter) - @view_use_legacy_sql.setter + @view_use_legacy_sql.setter # type: ignore # (redefinition from above) def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") @@ -1746,7 +1746,7 @@ def to_arrow( progress_bar.close() finally: if owns_bqstorage_client: - bqstorage_client._transport.grpc_channel.close() + bqstorage_client._transport.grpc_channel.close() # type: ignore if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) @@ -1763,7 +1763,7 @@ def to_dataframe_iterable( self, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, dtypes: Dict[str, Any] = None, - max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2307,7 +2307,7 @@ def __repr__(self): key_vals = ["{}={}".format(key, val) for key, val in self._key()] return "PartitionRange({})".format(", ".join(key_vals)) - __hash__ = None + __hash__ = None # type: ignore class RangePartitioning(object): @@ -2387,7 +2387,7 @@ def __repr__(self): key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] return "RangePartitioning({})".format(", ".join(key_vals)) - __hash__ = None + __hash__ = None # type: ignore class TimePartitioningType(object): From 180b49bb8c2e05ad8164b6451cd980004f44a923 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 28 Oct 2021 18:52:03 +0300 Subject: [PATCH 06/18] Fix mypy false positive errors --- google/cloud/bigquery/client.py | 38 +++++++++++++++++------------- google/cloud/bigquery/job/base.py | 2 +- google/cloud/bigquery/job/query.py | 2 +- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 50612224d..a83bdc68f 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -61,7 +61,7 @@ DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, ) except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None + DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -538,7 +538,7 @@ def _ensure_bqstorage_client( bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, client_options=client_options, - client_info=client_info, + client_info=client_info, # type: ignore # (None is also accepted) ) return bqstorage_client @@ -1380,7 +1380,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def list_routines( @@ -1457,7 +1457,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def list_tables( @@ -1533,7 +1533,7 @@ def api_request(*args, **kwargs): max_results=max_results, page_size=page_size, ) - result.dataset = dataset + result.dataset = dataset # type: ignore return result def delete_dataset( @@ -1883,20 +1883,18 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource: dict) -> job.UnknownJob: + def job_from_resource( + self, resource: dict + ) -> Union[ + job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob, + ]: """Detect correct job type from resource and instantiate. Args: resource (Dict): one job resource from API response Returns: - Union[ \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]: - The job instance, constructed via the resource. + The job instance, constructed via the resource. """ config = resource.get("configuration", {}) if "load" in config: @@ -2079,7 +2077,11 @@ def get_job( timeout=timeout, ) - return self.job_from_resource(resource) + job_instance = self.job_from_resource(resource) # never an UnknownJob + + return typing.cast( + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, + ) def cancel_job( self, @@ -2153,7 +2155,11 @@ def cancel_job( timeout=timeout, ) - return self.job_from_resource(resource["job"]) + job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob + + return typing.cast( + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, + ) def list_jobs( self, @@ -2453,7 +2459,7 @@ def load_table_from_file( except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) - return self.job_from_resource(response.json()) + return typing.cast(LoadJob, self.job_from_resource(response.json())) def load_table_from_dataframe( self, diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index de028a178..b2167f0f2 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -696,7 +696,7 @@ def done( self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result( + def result( # type: ignore # (signature complaint) self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index c9e80f62d..dd9852195 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1448,7 +1448,7 @@ def do_get_result(): except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self + exc.query_job = self # type: ignore raise except requests.exceptions.Timeout as exc: raise concurrent.futures.TimeoutError from exc From e9badeb08885738c361314824eb9837c0a098994 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 30 Oct 2021 17:39:40 +0300 Subject: [PATCH 07/18] Simplify external config options instantiation --- google/cloud/bigquery/external_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 6df24a9e9..e6f6a97c3 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -674,8 +674,9 @@ def options(self) -> Optional[OptionsType]: # The code below is too much magic for mypy to handle. if self.source_format == optcls._SOURCE_FORMAT: # type: ignore options: OptionsType = optcls() # type: ignore - self._properties.setdefault(optcls._RESOURCE_NAME, {}) # type: ignore - options._properties = self._properties[optcls._RESOURCE_NAME] # type: ignore + options._properties = self._properties.setdefault( + optcls._RESOURCE_NAME, {} # type: ignore + ) return options # No matching source format found. From e847af4554471dba5af59bd9ce265ac2114e8322 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 30 Oct 2021 17:53:41 +0300 Subject: [PATCH 08/18] Do not ignore api-core in type checks More recent releases of google-api-core have typing enabled. --- google/cloud/bigquery/client.py | 6 +++--- google/cloud/bigquery/job/base.py | 4 ++-- google/cloud/bigquery/job/query.py | 4 ++-- google/cloud/bigquery/magics/magics.py | 6 +++--- google/cloud/bigquery/retry.py | 2 +- google/cloud/bigquery/table.py | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a83bdc68f..2e91d6537 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -47,9 +47,9 @@ from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload -import google.api_core.client_options # type: ignore -import google.api_core.exceptions as core_exceptions # type: ignore -from google.api_core.iam import Policy # type: ignore +import google.api_core.client_options +import google.api_core.exceptions as core_exceptions +from google.api_core.iam import Policy from google.api_core import page_iterator from google.api_core import retry as retries import google.cloud._helpers # type: ignore diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index b2167f0f2..97acab5d2 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -21,8 +21,8 @@ import typing from typing import Dict, Optional, Sequence -from google.api_core import exceptions # type: ignore -import google.api_core.future.polling # type: ignore +from google.api_core import exceptions +import google.api_core.future.polling from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index dd9852195..36e388238 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -20,8 +20,8 @@ import typing from typing import Any, Dict, Iterable, List, Optional, Union -from google.api_core import exceptions # type: ignore -from google.api_core.future import polling as polling_future # type: ignore +from google.api_core import exceptions +from google.api_core.future import polling as polling_future import requests from google.cloud.bigquery.dataset import Dataset diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 7b9aecbdd..1d8d8ed30 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -96,9 +96,9 @@ except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") -from google.api_core import client_info # type: ignore +from google.api_core import client_info from google.api_core import client_options -from google.api_core.exceptions import NotFound # type: ignore +from google.api_core.exceptions import NotFound import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset @@ -718,7 +718,7 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): raise customized_error from err try: - from google.api_core.gapic_v1 import client_info as gapic_client_info # type: ignore + from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: customized_error = ImportError( "Install the grpcio package to use the BigQuery Storage API." diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index c7aefe23e..254b26608 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.api_core import exceptions # type: ignore +from google.api_core import exceptions from google.api_core import retry from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 7182c2d2b..961920e05 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -48,8 +48,8 @@ except ImportError: # pragma: NO COVER pyarrow = None -import google.api_core.exceptions # type: ignore -from google.api_core.page_iterator import HTTPIterator # type: ignore +import google.api_core.exceptions +from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers From 195a0d68cadf1e27dc8b2acc9054f0d132ef327e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 30 Oct 2021 17:56:37 +0300 Subject: [PATCH 09/18] Remove unneeded __hash__ = None lines --- google/cloud/bigquery/table.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 961920e05..60c8593c7 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2307,8 +2307,6 @@ def __repr__(self): key_vals = ["{}={}".format(key, val) for key, val in self._key()] return "PartitionRange({})".format(", ".join(key_vals)) - __hash__ = None # type: ignore - class RangePartitioning(object): """Range-based partitioning configuration for a table. @@ -2387,8 +2385,6 @@ def __repr__(self): key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] return "RangePartitioning({})".format(", ".join(key_vals)) - __hash__ = None # type: ignore - class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" From 544d88f23c9c4c10fab33cb38cd75b8aa56dc279 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 30 Oct 2021 18:00:05 +0300 Subject: [PATCH 10/18] Use an alias for timeout type in client.py --- google/cloud/bigquery/client.py | 83 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2e91d6537..f738b5d56 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -112,6 +112,7 @@ pyarrow = _helpers.PYARROW_VERSIONS.try_import() PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] +TimeoutType = Union[float, None] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 @@ -260,7 +261,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -307,7 +308,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -373,7 +374,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -564,7 +565,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference, DatasetListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -639,7 +640,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -694,7 +695,7 @@ def create_table( table: Union[str, Table, TableReference, TableListItem], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -777,7 +778,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -821,7 +822,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -851,7 +852,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -884,7 +885,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -909,7 +910,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -952,7 +953,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -996,7 +997,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1038,7 +1039,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1108,7 +1109,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1172,7 +1173,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1246,7 +1247,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1312,7 +1313,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1389,7 +1390,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1466,7 +1467,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1541,7 +1542,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, DatasetListItem, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1600,7 +1601,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1654,7 +1655,7 @@ def delete_job_metadata( project: Optional[str] = None, location: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1717,7 +1718,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1771,7 +1772,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1824,7 +1825,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1911,7 +1912,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -2011,7 +2012,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2089,7 +2090,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2170,7 +2171,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2285,7 +2286,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2369,7 +2370,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2472,7 +2473,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2727,7 +2728,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -3010,7 +3011,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3113,7 +3114,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3211,7 +3212,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3493,7 +3494,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3628,7 +3629,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3678,7 +3679,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3790,7 +3791,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: Optional[float] = DEFAULT_TIMEOUT, + timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See From 3dde5150f4ef4246edcd9f9d9817bf89e5ad4cec Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 31 Oct 2021 08:29:09 +0200 Subject: [PATCH 11/18] Fix PathLike subscription error in pre-Python 3.9 --- google/cloud/bigquery/client.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index f738b5d56..4add2cb94 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -111,9 +111,12 @@ pyarrow = _helpers.PYARROW_VERSIONS.try_import() -PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] TimeoutType = Union[float, None] +if typing.TYPE_CHECKING: + # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. + PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -3877,7 +3880,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path: PathType): + def schema_from_json(self, file_or_path: "PathType"): """Takes a file object or file path that contains json that describes a table schema. @@ -3890,7 +3893,9 @@ def schema_from_json(self, file_or_path: PathType): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json(self, schema_list: Sequence[SchemaField], destination: PathType): + def schema_to_json( + self, schema_list: Sequence[SchemaField], destination: "PathType" + ): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. From 1fc18bc581ecac5508dc94f0bcd850224cbcc89d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 31 Oct 2021 08:57:54 +0100 Subject: [PATCH 12/18] Fix a typo in docstring Co-authored-by: Tim Swast --- google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4add2cb94..75e2c73a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2291,7 +2291,7 @@ def load_table_from_uri( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: - """Starts a job for loading data into a table from CloudStorage. + """Starts a job for loading data into a table from Cloud Storage. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload From 913e29f044dfd588daea9873003d0250803ba020 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 31 Oct 2021 09:59:22 +0200 Subject: [PATCH 13/18] Add mypy to the list of nox sessions to run --- noxfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/noxfile.py b/noxfile.py index 2f4976c0c..922d2a513 100644 --- a/noxfile.py +++ b/noxfile.py @@ -42,6 +42,7 @@ "lint", "lint_setup_py", "blacken", + "mypy", "pytype", "docs", ] From 46ba334e57d0c95ea2354da5713d952a187fb146 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 31 Oct 2021 11:55:16 +0200 Subject: [PATCH 14/18] Fix opentelemetry type error The Opentelemetry APi has changed from the minimum version the BigQuery client currently uses, we thus need to bound the maximum Opentelemetry version. In addition, that maximum version does not yet support type checks, thus it is ignored. --- google/cloud/bigquery/opentelemetry_tracing.py | 4 ++-- setup.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 7978a5ca3..74a8e8029 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) try: from opentelemetry import trace - from opentelemetry.instrumentation.utils import http_status_to_status_code + from opentelemetry.instrumentation.utils import http_status_to_canonical_code # type: ignore from opentelemetry.trace.status import Status HAS_OPENTELEMETRY = True @@ -81,7 +81,7 @@ def create_span(name, attributes=None, client=None, job_ref=None): yield span except GoogleAPICallError as error: if error.code is not None: - span.set_status(Status(http_status_to_status_code(error.code))) + span.set_status(Status(http_status_to_canonical_code(error.code))) raise diff --git a/setup.py b/setup.py index 95dad190a..ff97f229d 100644 --- a/setup.py +++ b/setup.py @@ -63,9 +63,11 @@ "bignumeric_type": pyarrow_dep, "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api >= 0.11b0", - "opentelemetry-sdk >= 0.11b0", - "opentelemetry-instrumentation >= 0.11b0", + # The Opentelemetry API was changed in v0.15b, we need to bound the version. + # https://github.com/open-telemetry/opentelemetry-python/commit/f3cdfa2cdb9cb1c442189e2ead3788f45d92352d#diff-ebe3b7e2373ce704ff49dcab7a82d2270aa35c624337325cd0233e60e7a95a39R35 + "opentelemetry-api >= 0.11b0, <0.15b0", + "opentelemetry-sdk >= 0.11b0, <0.15b0", + "opentelemetry-instrumentation >= 0.11b0, <0.15b0", ], } From 54b45c208026cb5186d0ed266603a5b404c00444 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Sun, 31 Oct 2021 10:09:51 +0000 Subject: [PATCH 15/18] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- samples/magics/noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index b008613f0..93a9122cc 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] @@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]: "True", "true", ) + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + # # Style Checks # From 514efda4cc65a0aad946d4160aba4da769899620 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sun, 31 Oct 2021 17:31:19 +0200 Subject: [PATCH 16/18] Exclude type-checking code from coverage --- google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 75e2c73a5..64299f055 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -113,7 +113,7 @@ TimeoutType = Union[float, None] -if typing.TYPE_CHECKING: +if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] From 3effe3a2108bcd1d9e18a789848b0c5b97bf26ed Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 1 Nov 2021 22:12:43 +0200 Subject: [PATCH 17/18] Fix patching opentelemetry tracer pvoider --- tests/unit/test_client.py | 7 +++++-- tests/unit/test_opentelemetry_tracing.py | 9 ++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 48dacf7e2..0b4d463d3 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -786,7 +786,10 @@ def test_span_status_is_set(self): memory_exporter = InMemorySpanExporter() span_processor = SimpleExportSpanProcessor(memory_exporter) tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + + # OpenTelemetry API >= 0.12b0 does not allow overriding the tracer once + # initialized, thus directly override the internal global var. + tracer_patcher = mock.patch.object(trace, "_TRACER_PROVIDER", tracer_provider) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -797,7 +800,7 @@ def test_span_status_is_set(self): full_routine_id = "test-routine-project.test_routines.minimal_routine" routine = Routine(full_routine_id) - with pytest.raises(google.api_core.exceptions.AlreadyExists): + with pytest.raises(google.api_core.exceptions.AlreadyExists), tracer_patcher: client.create_routine(routine) span_list = memory_exporter.get_finished_spans() diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 726e3cf6f..133e53d92 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -44,9 +44,16 @@ def setup(): memory_exporter = InMemorySpanExporter() span_processor = SimpleExportSpanProcessor(memory_exporter) tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + + # OpenTelemetry API >= 0.12b0 does not allow overriding the tracer once + # initialized, thus directly override (and then restore) the internal global var. + orig_trace_provider = trace._TRACER_PROVIDER + trace._TRACER_PROVIDER = tracer_provider + yield memory_exporter + trace._TRACER_PROVIDER = orig_trace_provider + @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_opentelemetry_not_installed(setup, monkeypatch): From 94b58e3b8a48bb38a88f136c6a7612c8e758d152 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 4 Nov 2021 10:47:13 +0200 Subject: [PATCH 18/18] Adjust get_job() return type, ignore opentelemetry --- google/cloud/bigquery/client.py | 27 +++++-------------- .../cloud/bigquery/opentelemetry_tracing.py | 4 +-- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d8072632a..3e641e195 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2010,25 +2010,20 @@ def create_job( def get_job( self, - job_id: str, + job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: """Fetch a job for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: + Job identifier. Keyword Arguments: project (Optional[str]): @@ -2043,13 +2038,7 @@ def get_job( before using ``retry``. Returns: - Union[ \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]: - Job instance, based on the resource returned by the API. + Job instance, based on the resource returned by the API. """ extra_params = {"projection": "full"} @@ -2080,11 +2069,7 @@ def get_job( timeout=timeout, ) - job_instance = self.job_from_resource(resource) # never an UnknownJob - - return typing.cast( - Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, - ) + return self.job_from_resource(resource) def cancel_job( self, diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 74a8e8029..e1e1d2157 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -18,9 +18,9 @@ logger = logging.getLogger(__name__) try: - from opentelemetry import trace + from opentelemetry import trace # type: ignore from opentelemetry.instrumentation.utils import http_status_to_canonical_code # type: ignore - from opentelemetry.trace.status import Status + from opentelemetry.trace.status import Status # type: ignore HAS_OPENTELEMETRY = True _warned_telemetry = True